Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG egover=1.7.2
ARG egover=1.8.0
ARG baseimage=ghcr.io/edgelesssys/ego-deploy:v${egover}
ARG VERSION

Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ test-jobs: docker-build-test
test-twitter: docker-build-test
@docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/twitter_test.go ./internal/jobs/jobs_suite_test.go

test-linkedin: docker-build-test
@docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/linkedin_test.go ./internal/jobs/jobs_suite_test.go

test-tiktok: docker-build-test
@docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/tiktok_test.go ./internal/jobs/jobs_suite_test.go

Expand Down
37 changes: 20 additions & 17 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,47 +1,50 @@
module github.com/masa-finance/tee-worker

go 1.23.0
go 1.24.0

toolchain go1.24.3
toolchain go1.24.6

require (
github.com/edgelesssys/ego v1.7.2
github.com/edgelesssys/ego v1.8.0
github.com/google/uuid v1.6.0
github.com/imperatrona/twitter-scraper v0.0.18
github.com/joho/godotenv v1.5.1
github.com/labstack/echo-contrib v0.17.4
github.com/labstack/echo/v4 v4.13.4
github.com/masa-finance/tee-types v1.1.17
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.38.0
github.com/masa-finance/tee-types v1.2.0
github.com/onsi/ginkgo/v2 v2.26.0
github.com/onsi/gomega v1.38.2
github.com/sirupsen/logrus v1.9.3
)

replace github.com/imperatrona/twitter-scraper => github.com/masa-finance/twitter-scraper v1.0.2

require (
github.com/AlexEidt/Vidio v1.5.1 // indirect
github.com/Masterminds/semver/v3 v3.4.0 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
go.uber.org/automaxprocs v1.6.0 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/mod v0.28.0 // indirect
golang.org/x/sync v0.17.0 // indirect
)

require (
github.com/go-jose/go-jose/v4 v4.1.2 // indirect
github.com/go-jose/go-jose/v4 v4.1.3 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5 // indirect
github.com/google/pprof v0.0.0-20251007162407-5df77e3f7d1d // indirect
github.com/labstack/gommon v0.4.2
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasttemplate v1.2.2 // indirect
golang.org/x/crypto v0.41.0 // indirect
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792
golang.org/x/net v0.43.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/text v0.28.0 // indirect
golang.org/x/time v0.12.0 // indirect
golang.org/x/tools v0.35.0 // indirect
google.golang.org/protobuf v1.36.7 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
golang.org/x/crypto v0.43.0 // indirect
golang.org/x/exp v0.0.0-20251002181428-27f1f14c8bb9
golang.org/x/net v0.46.0 // indirect
golang.org/x/sys v0.37.0 // indirect
golang.org/x/text v0.30.0 // indirect
golang.org/x/time v0.14.0 // indirect
golang.org/x/tools v0.37.0 // indirect
google.golang.org/protobuf v1.36.10 // indirect
)
96 changes: 64 additions & 32 deletions go.sum

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions internal/apify/actors.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ type actorIds struct {
LLMDatasetProcessor ActorId
TwitterFollowers ActorId
WebScraper ActorId
LinkedInSearchProfile ActorId
}

var ActorIds = actorIds{
Expand All @@ -20,6 +21,7 @@ var ActorIds = actorIds{
LLMDatasetProcessor: "dusan.vystrcil~llm-dataset-processor",
TwitterFollowers: "kaitoeasyapi~premium-x-follower-scraper-following-data",
WebScraper: "apify~website-content-crawler",
LinkedInSearchProfile: "harvestapi~linkedin-profile-search",
}

type defaultActorInput map[string]any
Expand Down Expand Up @@ -63,4 +65,10 @@ var Actors = []ActorConfig{
Capabilities: teetypes.WebCaps,
JobType: teetypes.WebJob,
},
{
ActorId: ActorIds.LinkedInSearchProfile,
DefaultInput: defaultActorInput{},
Capabilities: teetypes.LinkedInCaps,
JobType: teetypes.LinkedInJob,
},
}
108 changes: 108 additions & 0 deletions internal/jobs/linkedin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package jobs

import (
"encoding/json"
"errors"
"fmt"

"github.com/sirupsen/logrus"

"github.com/masa-finance/tee-worker/api/types"
"github.com/masa-finance/tee-worker/internal/config"
"github.com/masa-finance/tee-worker/internal/jobs/linkedinapify"
"github.com/masa-finance/tee-worker/internal/jobs/stats"
"github.com/masa-finance/tee-worker/pkg/client"

teeargs "github.com/masa-finance/tee-types/args"
profileArgs "github.com/masa-finance/tee-types/args/linkedin/profile"
teetypes "github.com/masa-finance/tee-types/types"
profileTypes "github.com/masa-finance/tee-types/types/linkedin/profile"
)

// LinkedInApifyClient defines the interface for the LinkedIn Apify client to allow mocking in tests
type LinkedInApifyClient interface {
SearchProfiles(workerID string, args *profileArgs.Arguments, cursor client.Cursor) ([]*profileTypes.Profile, string, client.Cursor, error)
ValidateApiKey() error
}

// NewLinkedInApifyClient is a function variable that can be replaced in tests.
// It defaults to the actual implementation.
var NewLinkedInApifyClient = func(apiKey string, statsCollector *stats.StatsCollector) (LinkedInApifyClient, error) {
return linkedinapify.NewClient(apiKey, statsCollector)
}

type LinkedInScraper struct {
configuration config.JobConfiguration
statsCollector *stats.StatsCollector
capabilities []teetypes.Capability
}

func NewLinkedInScraper(jc config.JobConfiguration, statsCollector *stats.StatsCollector) *LinkedInScraper {
logrus.Info("LinkedIn scraper via Apify initialized")
return &LinkedInScraper{
configuration: jc,
statsCollector: statsCollector,
capabilities: teetypes.LinkedInCaps,
}
}

func (ls *LinkedInScraper) ExecuteJob(j types.Job) (types.JobResult, error) {
logrus.WithField("job_uuid", j.UUID).Info("Starting ExecuteJob for LinkedIn profile search")

// Require Apify key for LinkedIn scraping
apifyApiKey := ls.configuration.GetString("apify_api_key", "")
if apifyApiKey == "" {
msg := errors.New("Apify API key is required for LinkedIn job")
return types.JobResult{Error: msg.Error()}, msg
}

jobArgs, err := teeargs.UnmarshalJobArguments(teetypes.JobType(j.Type), map[string]any(j.Arguments))
if err != nil {
msg := fmt.Errorf("failed to unmarshal job arguments: %w", err)
return types.JobResult{Error: msg.Error()}, msg
}

linkedinArgs, ok := jobArgs.(*profileArgs.Arguments)
if !ok {
return types.JobResult{Error: "invalid argument type for LinkedIn job"}, errors.New("invalid argument type")
}
logrus.Debugf("LinkedIn job args: %+v", *linkedinArgs)

linkedinClient, err := NewLinkedInApifyClient(apifyApiKey, ls.statsCollector)
if err != nil {
return types.JobResult{Error: "error while creating LinkedIn Apify client"}, fmt.Errorf("error creating LinkedIn Apify client: %w", err)
}

profiles, datasetId, cursor, err := linkedinClient.SearchProfiles(j.WorkerID, linkedinArgs, client.EmptyCursor)
if err != nil {
return types.JobResult{Error: fmt.Sprintf("error while searching LinkedIn profiles: %s", err.Error())}, fmt.Errorf("error searching LinkedIn profiles: %w", err)
}

if datasetId == "" {
return types.JobResult{Error: "missing dataset id from LinkedIn profile search"}, errors.New("missing dataset id from LinkedIn profile search")
}

data, err := json.Marshal(profiles)
if err != nil {
return types.JobResult{Error: fmt.Sprintf("error marshalling LinkedIn response")}, fmt.Errorf("error marshalling LinkedIn response: %w", err)
}

return types.JobResult{
Data: data,
Job: j,
NextCursor: cursor.String(),
}, nil
}

// GetStructuredCapabilities returns the structured capabilities supported by the LinkedIn scraper
// based on the available credentials and API keys
func (ls *LinkedInScraper) GetStructuredCapabilities() teetypes.WorkerCapabilities {
capabilities := make(teetypes.WorkerCapabilities)

apifyApiKey := ls.configuration.GetString("apify_api_key", "")
if apifyApiKey != "" {
capabilities[teetypes.LinkedInJob] = teetypes.LinkedInCaps
}

return capabilities
}
Loading
Loading