From 68de81481d15c4c1d3f7431261add3f97a9d27d8 Mon Sep 17 00:00:00 2001 From: mcamou Date: Tue, 26 Aug 2025 15:37:29 +0200 Subject: [PATCH 1/6] Add GetStructuredCapabilities to RedditScraper --- internal/capabilities/detector_test.go | 6 ------ internal/jobs/reddit.go | 13 +++++++++++++ internal/jobserver/jobserver.go | 21 +++++++-------------- internal/jobserver/worker.go | 2 ++ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/internal/capabilities/detector_test.go b/internal/capabilities/detector_test.go index eb26d9f5..7f9e1035 100644 --- a/internal/capabilities/detector_test.go +++ b/internal/capabilities/detector_test.go @@ -125,12 +125,6 @@ func TestDetectCapabilities(t *testing.T) { } } -// Helper function to check if a job type exists in capabilities -func hasJobType(capabilities teetypes.WorkerCapabilities, jobName string) bool { - _, exists := capabilities[teetypes.JobType(jobName)] - return exists -} - func TestDetectCapabilities_ScraperTypes(t *testing.T) { tests := []struct { name string diff --git a/internal/jobs/reddit.go b/internal/jobs/reddit.go index c08fc9c8..f519bc33 100644 --- a/internal/jobs/reddit.go +++ b/internal/jobs/reddit.go @@ -111,3 +111,16 @@ func processRedditResponse(j types.Job, resp []*reddit.Response, cursor client.C NextCursor: cursor.String(), }, nil } + +// GetStructuredCapabilities returns the structured capabilities supported by this Twitter scraper +// based on the available credentials and API keys +func (rs *RedditScraper) GetStructuredCapabilities() teetypes.WorkerCapabilities { + capabilities := make(teetypes.WorkerCapabilities) + + // Add Apify-specific capabilities based on available API key + if rs.configuration.ApifyApiKey != "" { + capabilities[teetypes.RedditJob] = teetypes.RedditCaps + } + + return capabilities +} diff --git a/internal/jobserver/jobserver.go b/internal/jobserver/jobserver.go index 58abd86e..7030439e 100644 --- a/internal/jobserver/jobserver.go +++ b/internal/jobserver/jobserver.go @@ -151,26 +151,19 @@ func NewJobServer(workers int, jc types.JobConfiguration) *JobServer { return js } -// CapabilityProvider is an interface for workers that can report their capabilities -type CapabilityProvider interface { - GetStructuredCapabilities() teetypes.WorkerCapabilities -} - // GetWorkerCapabilities returns the structured capabilities for all registered workers func (js *JobServer) GetWorkerCapabilities() teetypes.WorkerCapabilities { // Use a map to deduplicate capabilities by job type jobTypeCapMap := make(map[teetypes.JobType]map[teetypes.Capability]struct{}) for _, workerEntry := range js.jobWorkers { - if provider, ok := workerEntry.w.(CapabilityProvider); ok { - workerCapabilities := provider.GetStructuredCapabilities() - for jobType, capabilities := range workerCapabilities { - if _, exists := jobTypeCapMap[jobType]; !exists { - jobTypeCapMap[jobType] = make(map[teetypes.Capability]struct{}) - } - for _, capability := range capabilities { - jobTypeCapMap[jobType][capability] = struct{}{} - } + workerCapabilities := workerEntry.w.GetStructuredCapabilities() + for jobType, capabilities := range workerCapabilities { + if _, exists := jobTypeCapMap[jobType]; !exists { + jobTypeCapMap[jobType] = make(map[teetypes.Capability]struct{}) + } + for _, capability := range capabilities { + jobTypeCapMap[jobType][capability] = struct{}{} } } } diff --git a/internal/jobserver/worker.go b/internal/jobserver/worker.go index ec55110f..c9458c2b 100644 --- a/internal/jobserver/worker.go +++ b/internal/jobserver/worker.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + teetypes "github.com/masa-finance/tee-types/types" "github.com/masa-finance/tee-worker/api/types" "github.com/sirupsen/logrus" ) @@ -25,6 +26,7 @@ func (js *JobServer) worker(c context.Context) { } type worker interface { + GetStructuredCapabilities() teetypes.WorkerCapabilities ExecuteJob(j types.Job) (types.JobResult, error) } From c2669e81b77cd7eb3ab21b7ebf42ab44f90bd222 Mon Sep 17 00:00:00 2001 From: mcamou Date: Tue, 26 Aug 2025 18:20:06 +0200 Subject: [PATCH 2/6] Add ApifyApiKey TODOs --- README.md | 2 +- internal/capabilities/detector.go | 1 + internal/jobs/reddit.go | 1 + internal/jobs/twitter.go | 4 ++++ internal/jobserver/jobserver_test.go | 22 ++++++++++++---------- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index c8850862..1eccb680 100644 --- a/README.md +++ b/README.md @@ -612,7 +612,7 @@ func main() { // Step 1: Create the job request job := types.Job{ - Type: "web-scraper", + Type: "web", Arguments: map[string]interface{}{ "url": "https://google.com", "depth": 1, diff --git a/internal/capabilities/detector.go b/internal/capabilities/detector.go index 32720507..2eef55e5 100644 --- a/internal/capabilities/detector.go +++ b/internal/capabilities/detector.go @@ -58,6 +58,7 @@ func DetectCapabilities(jc types.JobConfiguration, jobServer JobServerInterface) } // Add Apify-specific capabilities based on available API key + // TODO: We should verify whether each of the actors is actually available through this API key if hasApifyKey { capabilities[teetypes.TwitterApifyJob] = teetypes.TwitterApifyCaps capabilities[teetypes.RedditJob] = teetypes.RedditCaps diff --git a/internal/jobs/reddit.go b/internal/jobs/reddit.go index f519bc33..285c19f7 100644 --- a/internal/jobs/reddit.go +++ b/internal/jobs/reddit.go @@ -118,6 +118,7 @@ func (rs *RedditScraper) GetStructuredCapabilities() teetypes.WorkerCapabilities capabilities := make(teetypes.WorkerCapabilities) // Add Apify-specific capabilities based on available API key + // TODO: We should verify whether each of the actors is actually available through this API key if rs.configuration.ApifyApiKey != "" { capabilities[teetypes.RedditJob] = teetypes.RedditCaps } diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 116671b4..cfb59eab 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -147,6 +147,7 @@ func (ts *TwitterScraper) getApiScraper(j types.Job) (*twitterx.TwitterXScraper, // getApifyScraper returns an Apify client func (ts *TwitterScraper) getApifyScraper(j types.Job) (*twitterapify.TwitterApifyClient, error) { + // TODO: We should verify whether each of the actors is actually available through this API key if ts.configuration.ApifyApiKey == "" { ts.statsCollector.Add(j.WorkerID, stats.TwitterAuthErrors, 1) return nil, fmt.Errorf("no Apify API key available") @@ -980,6 +981,7 @@ func NewTwitterScraper(jc types.JobConfiguration, c *stats.StatsCollector) *Twit accountManager.DetectAllApiKeyTypes() // Validate Apify API key at startup if provided (similar to API key detection) + // TODO: We should verify whether each of the actors is actually available through this API key if config.ApifyApiKey != "" { apifyScraper, err := twitterapify.NewTwitterApifyClient(config.ApifyApiKey) if err != nil { @@ -1058,6 +1060,7 @@ func (ts *TwitterScraper) GetStructuredCapabilities() teetypes.WorkerCapabilitie } // Add Apify-specific capabilities based on available API key + // TODO: We should verify whether each of the actors is actually available through this API key if ts.configuration.ApifyApiKey != "" { capabilities[teetypes.TwitterApifyJob] = teetypes.TwitterApifyCaps } @@ -1181,6 +1184,7 @@ func (s *DefaultScrapeStrategy) Execute(j types.Job, ts *TwitterScraper, jobArgs switch capability { case teetypes.CapGetFollowers, teetypes.CapGetFollowing: // Priority: Apify > Credentials for general TwitterJob + // TODO: We should verify whether each of the actors is actually available through this API key if ts.configuration.ApifyApiKey != "" { // Use Apify strategy apifyStrategy := &ApifyScrapeStrategy{} diff --git a/internal/jobserver/jobserver_test.go b/internal/jobserver/jobserver_test.go index 3a133e8c..4a57af48 100644 --- a/internal/jobserver/jobserver_test.go +++ b/internal/jobserver/jobserver_test.go @@ -8,6 +8,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + teetypes "github.com/masa-finance/tee-types/types" + "github.com/masa-finance/tee-worker/api/types" "github.com/masa-finance/tee-worker/internal/config" . "github.com/masa-finance/tee-worker/internal/jobserver" @@ -22,8 +24,8 @@ var _ = Describe("Jobserver", func() { jobserver := NewJobServer(2, types.JobConfiguration{}) uuid, err := jobserver.AddJob(types.Job{ - Type: "web-scraper", - Arguments: map[string]interface{}{ + Type: teetypes.WebJob, + Arguments: map[string]any{ "url": "google", }, }) @@ -49,8 +51,8 @@ var _ = Describe("Jobserver", func() { jobserver := NewJobServer(2, types.JobConfiguration{}) uuid, err := jobserver.AddJob(types.Job{ - Type: "web-scraper", - Arguments: map[string]interface{}{ + Type: teetypes.WebJob, + Arguments: map[string]any{ "url": "google", }, Nonce: "1234567890", @@ -62,9 +64,9 @@ var _ = Describe("Jobserver", func() { Expect(err.Error()).To(ContainSubstring("this job is not from a whitelisted miner")) uuid, err = jobserver.AddJob(types.Job{ - Type: "web-scraper", + Type: teetypes.WebJob, WorkerID: "miner1", - Arguments: map[string]interface{}{ + Arguments: map[string]any{ "url": "google", }, Nonce: "1234567891", @@ -79,8 +81,8 @@ var _ = Describe("Jobserver", func() { jobserver := NewJobServer(2, types.JobConfiguration{}) uuid, err := jobserver.AddJob(types.Job{ - Type: "web-scraper", - Arguments: map[string]interface{}{ + Type: teetypes.WebJob, + Arguments: map[string]any{ "url": "google", }, Nonce: "1234567890", @@ -94,8 +96,8 @@ var _ = Describe("Jobserver", func() { Expect(exists).ToNot(BeTrue()) uuid, err = jobserver.AddJob(types.Job{ - Type: "web-scraper", - Arguments: map[string]interface{}{ + Type: teetypes.WebJob, + Arguments: map[string]any{ "url": "google", }, Nonce: "1234567890", From 2c0f387d77510cb0b37c559a4a31d72ac83f8674 Mon Sep 17 00:00:00 2001 From: mcamou Date: Wed, 27 Aug 2025 14:38:19 +0200 Subject: [PATCH 3/6] Simplify Reddit scrapeurls parameters and only allow post or comment URLs --- README.md | 14 +++++-------- api/types/reddit/reddit.go | 40 +------------------------------------- go.mod | 2 +- 3 files changed, 7 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 1eccb680..db859da6 100644 --- a/README.md +++ b/README.md @@ -209,14 +209,16 @@ Transcribes TikTok videos to text. There are four different types of Reddit searches: -- `scrapeurls`: Gets the content of one or more Reddit URLs +- `scrapeurls`: Gets the content of one or more Reddit URLs. These must be post or comment URLs (no communities or users). - `searchposts`: Searches posts and comments - `searchusers`: Searches user profiles - `searchcommunities`: Searches communities **Parameters** (all are optional except where noted) -- `urls` (array of object with `url` and `query` keys, required for `scrapeurls`): Each element contains a Reddit URL to scrape together with the method (which by default will be `"GET"`). +**Note** Only one of `urls` and `queries` can be provided, depending on the query type. + +- `urls` (array of string, required for `scrapeurls`): Each element contains a Reddit URL to scrape. Only Reddit post and comment URLs are allowed (e.g. `https://reddit.com/r//comments/...`) - `queries` (array of string, required for all job types except `scrapeurls`): Each element is a string to search for. - `sort` (string) What to order by. Possible values are `"relevance"`, `"hot"`, `"top"`, `"new"`, `"rising"` and `"comments"`. - `include_nsfw` (boolean): Whether to include content tagged NSFW. Default is `false`. @@ -240,13 +242,7 @@ There are four different types of Reddit searches: "arguments": { "type": "scrapeurls", "urls": [ - { - "url": "https://reddit.com/r/ArtificialIntelligence", - "method": "GET" - }, - { - "url": "https://reddit.com/u/TheTelegraph" - } + "https://reddit.com/r/ArtificialIntelligence/comments/1n1dwzv/what_math_should_i_focus_on_for_ai_and_why/" ], "sort": "new", "include_nsfw": true, diff --git a/api/types/reddit/reddit.go b/api/types/reddit/reddit.go index b8c4fcb8..5413ced8 100644 --- a/api/types/reddit/reddit.go +++ b/api/types/reddit/reddit.go @@ -4,47 +4,9 @@ import ( "encoding/json" "fmt" "time" - - "github.com/masa-finance/tee-types/pkg/util" -) - -type QueryType string - -const ( - ScrapeUrls QueryType = "scrapeurls" - SearchPosts QueryType = "searchposts" - SearchUsers QueryType = "searchusers" - SearchCommunities QueryType = "searchcommunities" -) - -var AllQueryTypes = util.NewSet(ScrapeUrls, SearchPosts, SearchUsers, SearchCommunities) - -type SortType string - -const ( - SortRelevance SortType = "relevance" - SortHot SortType = "hot" - SortTop SortType = "top" - SortNew SortType = "new" - SortRising SortType = "rising" - SortComments SortType = "comments" ) -var AllSortTypes = util.NewSet( - SortRelevance, - SortHot, - SortTop, - SortNew, - SortRising, - SortComments, -) - -// StartURL represents a single start URL for the Apify Reddit scraper. -type StartURL struct { - URL string `json:"url"` - Method string `json:"method"` -} - +// FIXME: These are duplicated here and in tee-types/types/reddit.go type ResponseType string const ( diff --git a/go.mod b/go.mod index a899daea..ac331a59 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/joho/godotenv v1.5.1 github.com/labstack/echo-contrib v0.17.4 github.com/labstack/echo/v4 v4.13.4 - github.com/masa-finance/tee-types v1.1.10 + github.com/masa-finance/tee-types v1.1.10 // FIXME: Update when https://github.com/masa-finance/tee-types/pull/19 is merged and tagged github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.38.0 github.com/sirupsen/logrus v1.9.3 From c072b34edd3c069dfbdb3658e2f53dd009109e66 Mon Sep 17 00:00:00 2001 From: mcamou Date: Wed, 27 Aug 2025 14:50:33 +0200 Subject: [PATCH 4/6] Convert urls to the shape expected by Apify --- go.mod | 5 ++++- go.sum | 4 ++-- internal/jobs/reddit.go | 10 +++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index ac331a59..765b72a3 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/joho/godotenv v1.5.1 github.com/labstack/echo-contrib v0.17.4 github.com/labstack/echo/v4 v4.13.4 - github.com/masa-finance/tee-types v1.1.10 // FIXME: Update when https://github.com/masa-finance/tee-types/pull/19 is merged and tagged + github.com/masa-finance/tee-types v1.1.10 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.38.0 github.com/sirupsen/logrus v1.9.3 @@ -21,6 +21,9 @@ require ( replace github.com/imperatrona/twitter-scraper => github.com/masa-finance/twitter-scraper v1.0.2 +// FIXME: rm when https://github.com/masa-finance/tee-types/pull/19 is merged and tagged, and the dependency above updated +replace github.com/masa-finance/tee-types => github.com/masa-finance/tee-types v1.1.11-0.20250827124429-c0ceec1a743c + require ( github.com/AlexEidt/Vidio v1.5.1 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect diff --git a/go.sum b/go.sum index 5c3c5310..140074d4 100644 --- a/go.sum +++ b/go.sum @@ -57,8 +57,8 @@ github.com/labstack/echo/v4 v4.13.4 h1:oTZZW+T3s9gAu5L8vmzihV7/lkXGZuITzTQkTEhcX github.com/labstack/echo/v4 v4.13.4/go.mod h1:g63b33BZ5vZzcIUF8AtRH40DrTlXnx4UMC8rBdndmjQ= github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0= github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU= -github.com/masa-finance/tee-types v1.1.10 h1:mn/wF84Yg6tXH+JigRwluWaBwHT8SeNMzGVDgclC+08= -github.com/masa-finance/tee-types v1.1.10/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc= +github.com/masa-finance/tee-types v1.1.11-0.20250827124429-c0ceec1a743c h1:RV4GjjezlQaWyvu34ViNQRySNMHwpOMnqIeLatT+tF0= +github.com/masa-finance/tee-types v1.1.11-0.20250827124429-c0ceec1a743c/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc= github.com/masa-finance/twitter-scraper v1.0.2 h1:him+wvYZHg/7EDdy73z1ceUywDJDRAhPLD2CSEa2Vfk= github.com/masa-finance/twitter-scraper v1.0.2/go.mod h1:38MY3g/h4V7Xl4HbW9lnkL8S3YiFZenBFv86hN57RG8= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= diff --git a/internal/jobs/reddit.go b/internal/jobs/reddit.go index 285c19f7..8dd58b1c 100644 --- a/internal/jobs/reddit.go +++ b/internal/jobs/reddit.go @@ -76,7 +76,15 @@ func (r *RedditScraper) ExecuteJob(j types.Job) (types.JobResult, error) { switch redditArgs.QueryType { case teetypes.RedditScrapeUrls: - resp, cursor, err := redditClient.ScrapeUrls(j.WorkerID, redditArgs.URLs, redditArgs.After, commonArgs, client.Cursor(redditArgs.NextCursor), redditArgs.MaxResults) + urls := make([]teetypes.RedditStartURL, 0, len(redditArgs.URLs)) + for _, u := range redditArgs.URLs { + urls = append(urls, teetypes.RedditStartURL{ + URL: u, + Method: "GET", + }) + } + + resp, cursor, err := redditClient.ScrapeUrls(j.WorkerID, urls, redditArgs.After, commonArgs, client.Cursor(redditArgs.NextCursor), redditArgs.MaxResults) return processRedditResponse(j, resp, cursor, err) case teetypes.RedditSearchUsers: From 48305fdebaeddb22f11769f731447fecdda95c91 Mon Sep 17 00:00:00 2001 From: mcamou Date: Wed, 27 Aug 2025 16:31:26 +0200 Subject: [PATCH 5/6] Fix test and update tee-types --- api/types/reddit/reddit.go | 1 - go.mod | 5 +---- go.sum | 4 ++-- internal/jobs/reddit_test.go | 2 +- internal/jobs/tiktok_transcription.go | 2 +- internal/jobs/twitter.go | 19 ++++++++----------- internal/jobs/webscraper.go | 2 +- 7 files changed, 14 insertions(+), 21 deletions(-) diff --git a/api/types/reddit/reddit.go b/api/types/reddit/reddit.go index 5413ced8..568e6e80 100644 --- a/api/types/reddit/reddit.go +++ b/api/types/reddit/reddit.go @@ -6,7 +6,6 @@ import ( "time" ) -// FIXME: These are duplicated here and in tee-types/types/reddit.go type ResponseType string const ( diff --git a/go.mod b/go.mod index 765b72a3..9cd9facf 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/joho/godotenv v1.5.1 github.com/labstack/echo-contrib v0.17.4 github.com/labstack/echo/v4 v4.13.4 - github.com/masa-finance/tee-types v1.1.10 + github.com/masa-finance/tee-types v1.1.12 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.38.0 github.com/sirupsen/logrus v1.9.3 @@ -21,9 +21,6 @@ require ( replace github.com/imperatrona/twitter-scraper => github.com/masa-finance/twitter-scraper v1.0.2 -// FIXME: rm when https://github.com/masa-finance/tee-types/pull/19 is merged and tagged, and the dependency above updated -replace github.com/masa-finance/tee-types => github.com/masa-finance/tee-types v1.1.11-0.20250827124429-c0ceec1a743c - require ( github.com/AlexEidt/Vidio v1.5.1 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect diff --git a/go.sum b/go.sum index 140074d4..137693b8 100644 --- a/go.sum +++ b/go.sum @@ -57,8 +57,8 @@ github.com/labstack/echo/v4 v4.13.4 h1:oTZZW+T3s9gAu5L8vmzihV7/lkXGZuITzTQkTEhcX github.com/labstack/echo/v4 v4.13.4/go.mod h1:g63b33BZ5vZzcIUF8AtRH40DrTlXnx4UMC8rBdndmjQ= github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0= github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU= -github.com/masa-finance/tee-types v1.1.11-0.20250827124429-c0ceec1a743c h1:RV4GjjezlQaWyvu34ViNQRySNMHwpOMnqIeLatT+tF0= -github.com/masa-finance/tee-types v1.1.11-0.20250827124429-c0ceec1a743c/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc= +github.com/masa-finance/tee-types v1.1.12 h1:SXBYJu76EM5JCujiso6ytBqEoYCvhDe1vvNFIXTmu6w= +github.com/masa-finance/tee-types v1.1.12/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc= github.com/masa-finance/twitter-scraper v1.0.2 h1:him+wvYZHg/7EDdy73z1ceUywDJDRAhPLD2CSEa2Vfk= github.com/masa-finance/twitter-scraper v1.0.2/go.mod h1:38MY3g/h4V7Xl4HbW9lnkL8S3YiFZenBFv86hN57RG8= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= diff --git a/internal/jobs/reddit_test.go b/internal/jobs/reddit_test.go index 3b9349e1..f0396bd1 100644 --- a/internal/jobs/reddit_test.go +++ b/internal/jobs/reddit_test.go @@ -97,7 +97,7 @@ var _ = Describe("RedditScraper", func() { It("should call ScrapeUrls for the correct QueryType", func() { job.Arguments = map[string]any{ "type": teetypes.RedditScrapeUrls, - "urls": []teetypes.RedditStartURL{{URL: "https://www.reddit.com/u/zaphod/", Method: "GET"}}, + "urls": []string{"https://www.reddit.com/u/zaphod/"}, } mockClient.ScrapeUrlsFunc = func(urls []teetypes.RedditStartURL, after time.Time, cArgs redditapify.CommonArgs, cursor client.Cursor, maxResults uint) ([]*reddit.Response, client.Cursor, error) { diff --git a/internal/jobs/tiktok_transcription.go b/internal/jobs/tiktok_transcription.go index ba893a13..aebbde00 100644 --- a/internal/jobs/tiktok_transcription.go +++ b/internal/jobs/tiktok_transcription.go @@ -111,7 +111,7 @@ func (ttt *TikTokTranscriber) ExecuteJob(j types.Job) (types.JobResult, error) { } // Type assert to TikTok arguments - tiktokArgs, ok := teeargs.AsTikTokArguments(jobArgs) + tiktokArgs, ok := jobArgs.(*teeargs.TikTokTranscriptionArguments) if !ok { return types.JobResult{Error: "invalid argument type for TikTok job"}, fmt.Errorf("invalid argument type") } diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index cfb59eab..8ffe6e71 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -1351,20 +1351,17 @@ func (ts *TwitterScraper) ExecuteJob(j types.Job) (types.JobResult, error) { } // Type assert to Twitter arguments - twitterArgs, ok := teeargs.AsTwitterArguments(jobArgs) + args, ok := jobArgs.(*teeargs.TwitterSearchArguments) if !ok { logrus.Errorf("Expected Twitter arguments for job ID %s, type %s", j.UUID, j.Type) return types.JobResult{Error: "invalid argument type for Twitter job"}, fmt.Errorf("invalid argument type") } // Log the capability for debugging - logrus.Debugf("Executing Twitter job ID %s with capability: %s", j.UUID, twitterArgs.GetCapability()) + logrus.Debugf("Executing Twitter job ID %s with capability: %s", j.UUID, args.GetCapability()) strategy := getScrapeStrategy(j.Type) - // Convert to concrete type for direct usage - args := twitterArgs.(*teeargs.TwitterSearchArguments) - jobResult, err := strategy.Execute(j, ts, args) if err != nil { logrus.Errorf("Error executing job ID %s, type %s: %v", j.UUID, j.Type, err) @@ -1378,37 +1375,37 @@ func (ts *TwitterScraper) ExecuteJob(j types.Job) (types.JobResult, error) { } switch { - case twitterArgs.IsSingleTweetOperation(): + case args.IsSingleTweetOperation(): var result *teetypes.TweetResult if err := jobResult.Unmarshal(&result); err != nil { logrus.Errorf("Error while unmarshalling single tweet result for job ID %s, type %s: %v", j.UUID, j.Type, err) return types.JobResult{Error: "error unmarshalling single tweet result for final validation"}, err } - case twitterArgs.IsMultipleTweetOperation(): + case args.IsMultipleTweetOperation(): var results []*teetypes.TweetResult if err := jobResult.Unmarshal(&results); err != nil { logrus.Errorf("Error while unmarshalling multiple tweet result for job ID %s, type %s: %v", j.UUID, j.Type, err) return types.JobResult{Error: "error unmarshalling multiple tweet result for final validation"}, err } - case twitterArgs.IsSingleProfileOperation(): + case args.IsSingleProfileOperation(): var result *twitterscraper.Profile if err := jobResult.Unmarshal(&result); err != nil { logrus.Errorf("Error while unmarshalling single profile result for job ID %s, type %s: %v", j.UUID, j.Type, err) return types.JobResult{Error: "error unmarshalling single profile result for final validation"}, err } - case twitterArgs.IsMultipleProfileOperation(): + case args.IsMultipleProfileOperation(): var results []*twitterscraper.Profile if err := jobResult.Unmarshal(&results); err != nil { logrus.Errorf("Error while unmarshalling multiple profile result for job ID %s, type %s: %v", j.UUID, j.Type, err) return types.JobResult{Error: "error unmarshalling multiple profile result for final validation"}, err } - case twitterArgs.IsSingleSpaceOperation(): + case args.IsSingleSpaceOperation(): var result *twitterscraper.Space if err := jobResult.Unmarshal(&result); err != nil { logrus.Errorf("Error while unmarshalling single space result for job ID %s, type %s: %v", j.UUID, j.Type, err) return types.JobResult{Error: "error unmarshalling single space result for final validation"}, err } - case twitterArgs.IsTrendsOperation(): + case args.IsTrendsOperation(): var results []string if err := jobResult.Unmarshal(&results); err != nil { logrus.Errorf("Error while unmarshalling trends result for job ID %s, type %s: %v", j.UUID, j.Type, err) diff --git a/internal/jobs/webscraper.go b/internal/jobs/webscraper.go index 7e4d652c..a90fe55d 100644 --- a/internal/jobs/webscraper.go +++ b/internal/jobs/webscraper.go @@ -55,7 +55,7 @@ func (ws *WebScraper) ExecuteJob(j types.Job) (types.JobResult, error) { } // Type assert to Web arguments - args, ok := teeargs.AsWebArguments(jobArgs) + args, ok := jobArgs.(*teeargs.WebSearchArguments) if !ok { logrus.Errorf("Expected Web arguments for job ID %s, type %s", j.UUID, j.Type) return types.JobResult{Error: "invalid argument type for Web job"}, nil From 1e7e831d039ccee1b308545140095e10f4cbfa55 Mon Sep 17 00:00:00 2001 From: mcamou Date: Wed, 27 Aug 2025 16:39:39 +0200 Subject: [PATCH 6/6] Fix test --- internal/jobs/reddit_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/jobs/reddit_test.go b/internal/jobs/reddit_test.go index f0396bd1..3fd639e8 100644 --- a/internal/jobs/reddit_test.go +++ b/internal/jobs/reddit_test.go @@ -95,14 +95,17 @@ var _ = Describe("RedditScraper", func() { }) It("should call ScrapeUrls for the correct QueryType", func() { + testUrls := []string{ + "https://www.reddit.com/r/HHGTTG/comments/1jynlrz/the_entire_series_after_restaurant_at_the_end_of/", + } job.Arguments = map[string]any{ "type": teetypes.RedditScrapeUrls, - "urls": []string{"https://www.reddit.com/u/zaphod/"}, + "urls": testUrls, } mockClient.ScrapeUrlsFunc = func(urls []teetypes.RedditStartURL, after time.Time, cArgs redditapify.CommonArgs, cursor client.Cursor, maxResults uint) ([]*reddit.Response, client.Cursor, error) { Expect(urls).To(HaveLen(1)) - Expect(urls[0].URL).To(Equal("https://www.reddit.com/u/zaphod/")) + Expect(urls[0].URL).To(Equal(testUrls[0])) return []*reddit.Response{{TypeSwitch: &reddit.TypeSwitch{Type: reddit.UserResponse}, User: &reddit.User{ID: "user1", DataType: string(reddit.UserResponse)}}}, "next", nil }