diff --git a/args/llm.go b/args/llm.go
new file mode 100644
index 0000000..1816094
--- /dev/null
+++ b/args/llm.go
@@ -0,0 +1,81 @@
+package args
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+
+	teetypes "github.com/masa-finance/tee-types/types"
+)
+
+var (
+	ErrLLMDatasetIdRequired = errors.New("dataset id is required")
+	ErrLLMPromptRequired    = errors.New("prompt is required")
+	ErrLLMMaxTokensNegative = errors.New("max tokens must be non-negative")
+)
+
+const (
+	LLMDefaultMaxTokens       = 300
+	LLMDefaultTemperature     = "0.1"
+	LLMDefaultMultipleColumns = false
+	LLMDefaultModel           = "gemini-1.5-flash-8b"
+)
+
+type LLMProcessorArguments struct {
+	DatasetId   string `json:"dataset_id"`
+	Prompt      string `json:"prompt"`
+	MaxTokens   int    `json:"max_tokens"`
+	Temperature string `json:"temperature"`
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling with validation
+func (l *LLMProcessorArguments) UnmarshalJSON(data []byte) error {
+	// Prevent infinite recursion (you call json.Unmarshal which then calls `UnmarshalJSON`, which then calls `json.Unmarshal`...)
+	type Alias LLMProcessorArguments
+	aux := &struct {
+		*Alias
+	}{
+		Alias: (*Alias)(l),
+	}
+
+	if err := json.Unmarshal(data, aux); err != nil {
+		return fmt.Errorf("failed to unmarshal llm arguments: %w", err)
+	}
+
+	l.setDefaultValues()
+
+	return l.Validate()
+}
+
+func (l *LLMProcessorArguments) setDefaultValues() {
+	if l.MaxTokens == 0 {
+		l.MaxTokens = LLMDefaultMaxTokens
+	}
+	if l.Temperature == "" {
+		l.Temperature = LLMDefaultTemperature
+	}
+}
+
+func (l *LLMProcessorArguments) Validate() error {
+	if l.DatasetId == "" {
+		return ErrLLMDatasetIdRequired
+	}
+	if l.Prompt == "" {
+		return ErrLLMPromptRequired
+	}
+	if l.MaxTokens < 0 {
+		return fmt.Errorf("%w: got %v", ErrLLMMaxTokensNegative, l.MaxTokens)
+	}
+	return nil
+}
+
+func (l LLMProcessorArguments) ToLLMProcessorRequest() teetypes.LLMProcessorRequest {
+	return teetypes.LLMProcessorRequest{
+		InputDatasetId:  l.DatasetId,
+		Prompt:          l.Prompt,
+		MaxTokens:       l.MaxTokens,
+		Temperature:     l.Temperature,
+		MultipleColumns: LLMDefaultMultipleColumns, // overrides default in actor API
+		Model:           LLMDefaultModel,           // overrides default in actor API
+	}
+}
diff --git a/args/llm_test.go b/args/llm_test.go
new file mode 100644
index 0000000..3884ebf
--- /dev/null
+++ b/args/llm_test.go
@@ -0,0 +1,136 @@
+package args_test
+
+import (
+	"encoding/json"
+	"errors"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/masa-finance/tee-types/args"
+)
+
+var _ = Describe("LLMProcessorArguments", func() {
+	Describe("Marshalling and unmarshalling", func() {
+		It("should set default values", func() {
+			llmArgs := args.LLMProcessorArguments{
+				DatasetId: "ds1",
+				Prompt:    "summarize: ${markdown}",
+			}
+			jsonData, err := json.Marshal(llmArgs)
+			Expect(err).ToNot(HaveOccurred())
+			err = json.Unmarshal([]byte(jsonData), &llmArgs)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(llmArgs.MaxTokens).To(Equal(300))
+			Expect(llmArgs.Temperature).To(Equal("0.1"))
+		})
+
+		It("should override default values", func() {
+			llmArgs := args.LLMProcessorArguments{
+				DatasetId:   "ds1",
+				Prompt:      "summarize: ${markdown}",
+				MaxTokens:   123,
+				Temperature: "0.7",
+			}
+			jsonData, err := json.Marshal(llmArgs)
+			Expect(err).ToNot(HaveOccurred())
+			err = json.Unmarshal([]byte(jsonData), &llmArgs)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(llmArgs.MaxTokens).To(Equal(123))
+			Expect(llmArgs.Temperature).To(Equal("0.7"))
+		})
+
+		It("should fail unmarshal when dataset_id is missing", func() {
+			var llmArgs args.LLMProcessorArguments
+			jsonData := []byte(`{"type":"datasetprocessor","prompt":"p"}`)
+			err := json.Unmarshal(jsonData, &llmArgs)
+			Expect(errors.Is(err, args.ErrLLMDatasetIdRequired)).To(BeTrue())
+		})
+
+		It("should fail unmarshal when prompt is missing", func() {
+			var llmArgs args.LLMProcessorArguments
+			jsonData := []byte(`{"type":"datasetprocessor","dataset_id":"ds1"}`)
+			err := json.Unmarshal(jsonData, &llmArgs)
+			Expect(errors.Is(err, args.ErrLLMPromptRequired)).To(BeTrue())
+		})
+	})
+
+	Describe("Validation", func() {
+		It("should succeed with valid arguments", func() {
+			llmArgs := &args.LLMProcessorArguments{
+				DatasetId:   "ds1",
+				Prompt:      "p",
+				MaxTokens:   10,
+				Temperature: "0.2",
+			}
+			err := llmArgs.Validate()
+			Expect(err).ToNot(HaveOccurred())
+		})
+
+		It("should fail when dataset_id is missing", func() {
+			llmArgs := &args.LLMProcessorArguments{
+				Prompt:      "p",
+				MaxTokens:   10,
+				Temperature: "0.2",
+			}
+			err := llmArgs.Validate()
+			Expect(errors.Is(err, args.ErrLLMDatasetIdRequired)).To(BeTrue())
+		})
+
+		It("should fail when prompt is missing", func() {
+			llmArgs := &args.LLMProcessorArguments{
+				DatasetId:   "ds1",
+				MaxTokens:   10,
+				Temperature: "0.2",
+			}
+			err := llmArgs.Validate()
+			Expect(errors.Is(err, args.ErrLLMPromptRequired)).To(BeTrue())
+		})
+
+		It("should fail when max tokens is negative", func() {
+			llmArgs := &args.LLMProcessorArguments{
+				DatasetId:   "ds1",
+				Prompt:      "p",
+				MaxTokens:   -1,
+				Temperature: "0.2",
+			}
+			err := llmArgs.Validate()
+			Expect(errors.Is(err, args.ErrLLMMaxTokensNegative)).To(BeTrue())
+			Expect(err.Error()).To(ContainSubstring("got -1"))
+		})
+	})
+
+	Describe("ToLLMProcessorRequest", func() {
+		It("should map fields and defaults correctly", func() {
+			llmArgs := args.LLMProcessorArguments{
+				DatasetId:   "ds1",
+				Prompt:      "p",
+				MaxTokens:   0, // default applied in To*
+				Temperature: "",
+			}
+			req := llmArgs.ToLLMProcessorRequest()
+			Expect(req.InputDatasetId).To(Equal("ds1"))
+			Expect(req.Prompt).To(Equal("p"))
+			Expect(req.MaxTokens).To(Equal(0))
+			Expect(req.Temperature).To(Equal(""))
+			Expect(req.MultipleColumns).To(BeFalse())
+			Expect(req.Model).To(Equal("gemini-1.5-flash-8b"))
+		})
+
+		It("should map fields correctly when set", func() {
+			llmArgs := args.LLMProcessorArguments{
+				DatasetId:   "ds1",
+				Prompt:      "p",
+				MaxTokens:   42,
+				Temperature: "0.7",
+			}
+			req := llmArgs.ToLLMProcessorRequest()
+			Expect(req.InputDatasetId).To(Equal("ds1"))
+			Expect(req.Prompt).To(Equal("p"))
+			Expect(req.MaxTokens).To(Equal(42))
+			Expect(req.Temperature).To(Equal("0.7"))
+			Expect(req.MultipleColumns).To(BeFalse())
+			Expect(req.Model).To(Equal("gemini-1.5-flash-8b"))
+		})
+	})
+})
diff --git a/args/unmarshaller.go b/args/unmarshaller.go
index ce6bb49..1d3c26d 100644
--- a/args/unmarshaller.go
+++ b/args/unmarshaller.go
@@ -10,52 +10,9 @@ import (
 
 // JobArguments defines the interface that all job arguments must implement
 type JobArguments interface {
-	Validate() error
 	GetCapability() types.Capability
 }
 
-// TwitterJobArguments extends JobArguments for Twitter-specific methods
-type TwitterJobArguments interface {
-	JobArguments
-	ValidateForJobType(jobType types.JobType) error
-	IsSingleTweetOperation() bool
-	IsMultipleTweetOperation() bool
-	IsSingleProfileOperation() bool
-	IsMultipleProfileOperation() bool
-	IsSingleSpaceOperation() bool
-	IsTrendsOperation() bool
-}
-
-// WebJobArguments extends JobArguments for Web-specific methods
-type WebJobArguments interface {
-	JobArguments
-	ValidateForJobType(jobType types.JobType) error
-	IsDeepScrape() bool
-	HasSelector() bool
-	GetEffectiveMaxDepth() int
-}
-
-// TikTokJobArguments extends JobArguments for TikTok-specific methods
-type TikTokJobArguments interface {
-	JobArguments
-	ValidateForJobType(jobType types.JobType) error
-	HasLanguagePreference() bool
-	GetVideoURL() string
-	GetLanguageCode() string
-}
-
-// LinkedInJobArguments extends JobArguments for LinkedIn-specific methods
-type LinkedInJobArguments interface {
-	JobArguments
-	ValidateForJobType(jobType types.JobType) error
-}
-
-// RedditJobArguments extends JobArguments for Reddit-specific methods
-type RedditJobArguments interface {
-	JobArguments
-	ValidateForJobType(jobType types.JobType) error
-}
-
 // UnmarshalJobArguments unmarshals job arguments from a generic map into the appropriate typed struct
 // This works with both tee-indexer and tee-worker JobArguments types
 func UnmarshalJobArguments(jobType types.JobType, args map[string]any) (JobArguments, error) {
@@ -84,8 +41,8 @@ func UnmarshalJobArguments(jobType types.JobType, args map[string]any) (JobArgum
 }
 
 // Helper functions for unmarshaling specific argument types
-func unmarshalWebArguments(args map[string]any) (*WebSearchArguments, error) {
-	webArgs := &WebSearchArguments{}
+func unmarshalWebArguments(args map[string]any) (*WebArguments, error) {
+	webArgs := &WebArguments{}
 	if err := unmarshalToStruct(args, webArgs); err != nil {
 		return nil, fmt.Errorf("failed to unmarshal web job arguments: %w", err)
 	}
diff --git a/args/unmarshaller_test.go b/args/unmarshaller_test.go
index 04e784f..4231cbd 100644
--- a/args/unmarshaller_test.go
+++ b/args/unmarshaller_test.go
@@ -14,15 +14,13 @@ var _ = Describe("Unmarshaller", func() {
 			It("should unmarshal the arguments correctly", func() {
 				argsMap := map[string]any{
 					"url":       "https://example.com",
-					"selector":  "h1",
 					"max_depth": 2,
 				}
 				jobArgs, err := args.UnmarshalJobArguments(types.WebJob, argsMap)
 				Expect(err).ToNot(HaveOccurred())
-				webArgs, ok := jobArgs.(*args.WebSearchArguments)
+				webArgs, ok := jobArgs.(*args.WebArguments)
 				Expect(ok).To(BeTrue())
 				Expect(webArgs.URL).To(Equal("https://example.com"))
-				Expect(webArgs.Selector).To(Equal("h1"))
 				Expect(webArgs.MaxDepth).To(Equal(2))
 			})
 		})
diff --git a/args/web.go b/args/web.go
index 33a466d..561aa59 100644
--- a/args/web.go
+++ b/args/web.go
@@ -2,24 +2,39 @@ package args
 
 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/url"
 
-	"github.com/masa-finance/tee-types/pkg/util"
 	teetypes "github.com/masa-finance/tee-types/types"
 )
 
-type WebSearchArguments struct {
-	URL      string `json:"url"`
-	Selector string `json:"selector"`
-	Depth    int    `json:"depth"`
-	MaxDepth int    `json:"max_depth"`
+var (
+	ErrWebURLRequired      = errors.New("url is required")
+	ErrWebURLInvalid       = errors.New("invalid URL format")
+	ErrWebURLSchemeMissing = errors.New("url must include a scheme (http:// or https://)")
+	ErrWebMaxDepth         = errors.New("max depth must be non-negative")
+	ErrWebMaxPages         = errors.New("max pages must be at least 1")
+)
+
+const (
+	WebDefaultMaxPages             = 1
+	WebDefaultMethod               = "GET"
+	WebDefaultRespectRobotsTxtFile = false
+	WebDefaultSaveMarkdown         = true
+)
+
+type WebArguments struct {
+	QueryType teetypes.WebQueryType `json:"type"`
+	URL       string                `json:"url"`
+	MaxDepth  int                   `json:"max_depth"`
+	MaxPages  int                   `json:"max_pages"`
 }
 
 // UnmarshalJSON implements custom JSON unmarshaling with validation
-func (w *WebSearchArguments) UnmarshalJSON(data []byte) error {
+func (w *WebArguments) UnmarshalJSON(data []byte) error {
 	// Prevent infinite recursion (you call json.Unmarshal which then calls `UnmarshalJSON`, which then calls `json.Unmarshal`...)
-	type Alias WebSearchArguments
+	type Alias WebArguments
 	aux := &struct {
 		*Alias
 	}{
@@ -27,46 +42,50 @@ func (w *WebSearchArguments) UnmarshalJSON(data []byte) error {
 	}
 
 	if err := json.Unmarshal(data, aux); err != nil {
-		return fmt.Errorf("failed to unmarshal Web arguments: %w", err)
+		return fmt.Errorf("failed to unmarshal web arguments: %w", err)
 	}
 
+	w.setDefaultValues()
+
 	return w.Validate()
 }
 
+func (w *WebArguments) setDefaultValues() {
+	if w.MaxPages == 0 {
+		w.MaxPages = WebDefaultMaxPages
+	}
+}
+
 // Validate validates the Web arguments
-func (w *WebSearchArguments) Validate() error {
+func (w *WebArguments) Validate() error {
 	if w.URL == "" {
-		return fmt.Errorf("url is required")
+		return ErrWebURLRequired
 	}
 
 	// Validate URL format
 	parsedURL, err := url.Parse(w.URL)
 	if err != nil {
-		return fmt.Errorf("invalid URL format: %w", err)
+		return fmt.Errorf("%w: %v", ErrWebURLInvalid, err)
 	}
 
 	// Ensure URL has a scheme
 	if parsedURL.Scheme == "" {
-		return fmt.Errorf("URL must include a scheme (http:// or https://)")
+		return ErrWebURLSchemeMissing
 	}
 
 	if w.MaxDepth < 0 {
-		return fmt.Errorf("max_depth must be non-negative, got: %d", w.MaxDepth)
+		return fmt.Errorf("%w: got %v", ErrWebMaxDepth, w.MaxDepth)
 	}
 
-	if w.Depth < 0 {
-		return fmt.Errorf("depth must be non-negative, got: %d", w.Depth)
-	}
-
-	if w.Depth > w.MaxDepth && w.MaxDepth > 0 {
-		return fmt.Errorf("depth (%d) cannot exceed max_depth (%d)", w.Depth, w.MaxDepth)
+	if w.MaxPages < 1 {
+		return fmt.Errorf("%w: got %v", ErrWebMaxPages, w.MaxPages)
 	}
 
 	return nil
 }
 
 // ValidateForJobType validates Web arguments for a specific job type
-func (w *WebSearchArguments) ValidateForJobType(jobType teetypes.JobType) error {
+func (w *WebArguments) ValidateForJobType(jobType teetypes.JobType) error {
 	if err := w.Validate(); err != nil {
 		return err
 	}
@@ -76,21 +95,18 @@ func (w *WebSearchArguments) ValidateForJobType(jobType teetypes.JobType) error
 }
 
 // GetCapability returns the capability for web operations (always scraper)
-func (w *WebSearchArguments) GetCapability() teetypes.Capability {
+func (w *WebArguments) GetCapability() teetypes.Capability {
 	return teetypes.CapScraper
 }
 
-// IsDeepScrape returns true if this is a deep scraping operation
-func (w *WebSearchArguments) IsDeepScrape() bool {
-	return w.MaxDepth > 1 || w.Depth > 0
-}
-
-// HasSelector returns true if a CSS selector is specified
-func (w *WebSearchArguments) HasSelector() bool {
-	return w.Selector != ""
-}
-
-// GetEffectiveMaxDepth returns the effective maximum depth for scraping
-func (w *WebSearchArguments) GetEffectiveMaxDepth() int {
-	return util.Max(w.MaxDepth, 1)
+func (w WebArguments) ToWebScraperRequest() teetypes.WebScraperRequest {
+	return teetypes.WebScraperRequest{
+		StartUrls: []teetypes.WebStartURL{
+			{URL: w.URL, Method: WebDefaultMethod},
+		},
+		MaxCrawlDepth:        w.MaxDepth,
+		MaxCrawlPages:        w.MaxPages,
+		RespectRobotsTxtFile: WebDefaultRespectRobotsTxtFile,
+		SaveMarkdown:         WebDefaultSaveMarkdown,
+	}
 }
diff --git a/args/web_test.go b/args/web_test.go
new file mode 100644
index 0000000..77e771f
--- /dev/null
+++ b/args/web_test.go
@@ -0,0 +1,158 @@
+package args_test
+
+import (
+	"encoding/json"
+	"errors"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/masa-finance/tee-types/args"
+	"github.com/masa-finance/tee-types/types"
+)
+
+var _ = Describe("WebArguments", func() {
+	Describe("Marshalling and unmarshalling", func() {
+		It("should set default values", func() {
+			webArgs := args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "https://example.com",
+				MaxDepth:  0,
+				MaxPages:  0,
+			}
+			jsonData, err := json.Marshal(webArgs)
+			Expect(err).ToNot(HaveOccurred())
+			err = json.Unmarshal([]byte(jsonData), &webArgs)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(webArgs.MaxPages).To(Equal(1))
+		})
+
+		It("should override default values", func() {
+			webArgs := args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "https://example.com",
+				MaxDepth:  2,
+				MaxPages:  5,
+			}
+			jsonData, err := json.Marshal(webArgs)
+			Expect(err).ToNot(HaveOccurred())
+			err = json.Unmarshal([]byte(jsonData), &webArgs)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(webArgs.MaxPages).To(Equal(5))
+		})
+
+		It("should fail unmarshal when url is missing", func() {
+			var webArgs args.WebArguments
+			jsonData := []byte(`{"type":"scraper","max_depth":1,"max_pages":1}`)
+			err := json.Unmarshal(jsonData, &webArgs)
+			Expect(errors.Is(err, args.ErrWebURLRequired)).To(BeTrue())
+		})
+	})
+
+	Describe("Validation", func() {
+		It("should succeed with valid arguments", func() {
+			webArgs := &args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "https://example.com",
+				MaxDepth:  2,
+				MaxPages:  3,
+			}
+			err := webArgs.Validate()
+			Expect(err).ToNot(HaveOccurred())
+		})
+
+		It("should fail when url is missing", func() {
+			webArgs := &args.WebArguments{
+				QueryType: types.WebScraper,
+				MaxDepth:  0,
+				MaxPages:  1,
+			}
+			err := webArgs.Validate()
+			Expect(errors.Is(err, args.ErrWebURLRequired)).To(BeTrue())
+		})
+
+		It("should fail with an invalid URL format", func() {
+			webArgs := &args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "http:// invalid.com",
+				MaxDepth:  0,
+				MaxPages:  1,
+			}
+			err := webArgs.Validate()
+			Expect(errors.Is(err, args.ErrWebURLInvalid)).To(BeTrue())
+			Expect(err.Error()).To(ContainSubstring("invalid URL format"))
+		})
+
+		It("should fail when scheme is missing", func() {
+			webArgs := &args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "example.com",
+				MaxDepth:  0,
+				MaxPages:  1,
+			}
+			err := webArgs.Validate()
+			Expect(errors.Is(err, args.ErrWebURLSchemeMissing)).To(BeTrue())
+		})
+
+		It("should fail when max depth is negative", func() {
+			webArgs := &args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "https://example.com",
+				MaxDepth:  -1,
+				MaxPages:  1,
+			}
+			err := webArgs.Validate()
+			Expect(errors.Is(err, args.ErrWebMaxDepth)).To(BeTrue())
+			Expect(err.Error()).To(ContainSubstring("got -1"))
+		})
+
+		It("should fail when max pages is less than 1", func() {
+			webArgs := &args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "https://example.com",
+				MaxDepth:  0,
+				MaxPages:  0,
+			}
+			err := webArgs.Validate()
+			Expect(errors.Is(err, args.ErrWebMaxPages)).To(BeTrue())
+			Expect(err.Error()).To(ContainSubstring("got 0"))
+		})
+	})
+
+	Describe("Job capability", func() {
+		It("should return the scraper capability", func() {
+			webArgs := &args.WebArguments{}
+			Expect(webArgs.GetCapability()).To(Equal(types.CapScraper))
+		})
+
+		It("should validate capability for WebJob", func() {
+			webArgs := &args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "https://example.com",
+				MaxDepth:  1,
+				MaxPages:  1,
+			}
+			err := webArgs.ValidateForJobType(types.WebJob)
+			Expect(err).ToNot(HaveOccurred())
+		})
+	})
+
+	Describe("ToWebScraperRequest", func() {
+		It("should map fields correctly", func() {
+			webArgs := args.WebArguments{
+				QueryType: types.WebScraper,
+				URL:       "https://example.com",
+				MaxDepth:  2,
+				MaxPages:  3,
+			}
+			req := webArgs.ToWebScraperRequest()
+			Expect(req.StartUrls).To(HaveLen(1))
+			Expect(req.StartUrls[0].URL).To(Equal("https://example.com"))
+			Expect(req.StartUrls[0].Method).To(Equal("GET"))
+			Expect(req.MaxCrawlDepth).To(Equal(2))
+			Expect(req.MaxCrawlPages).To(Equal(3))
+			Expect(req.RespectRobotsTxtFile).To(BeFalse())
+			Expect(req.SaveMarkdown).To(BeTrue())
+		})
+	})
+})
diff --git a/types/jobs.go b/types/jobs.go
index fe66292..1a7d46b 100644
--- a/types/jobs.go
+++ b/types/jobs.go
@@ -24,7 +24,7 @@ func (j JobType) ValidateCapability(capability Capability) error {
 	}
 
 	if !slices.Contains(validCaps, capability) {
-		return fmt.Errorf("capability '%s' is not valid for job type '%s'. Valid capabilities: %v",
+		return fmt.Errorf("capability '%s' is not valid for job type '%s'. valid capabilities: %v",
 			capability, j, validCaps)
 	}
 
@@ -74,7 +74,7 @@ const (
 	CapGetFollowing        Capability = "getfollowing"
 	CapGetFollowers        Capability = "getfollowers"
 	CapGetSpace            Capability = "getspace"
-	CapGetProfile          Capability = "getprofile" // LinkedIn get profile capability
+	CapGetProfile          Capability = "getprofile"
 	// Reddit capabilities
 	CapScrapeUrls        Capability = "scrapeurls"
 	CapSearchPosts       Capability = "searchposts"
@@ -86,14 +86,12 @@ const (
 
 // Capability group constants for easy reuse
 var (
-	AlwaysAvailableWebCaps       = []Capability{CapScraper, CapEmpty}
 	AlwaysAvailableTelemetryCaps = []Capability{CapTelemetry, CapEmpty}
 	AlwaysAvailableTiktokCaps    = []Capability{CapTranscription, CapEmpty}
 	AlwaysAvailableLinkedInCaps  = []Capability{CapSearchByQuery, CapGetProfile, CapEmpty}
 
 	// AlwaysAvailableCapabilities defines the job capabilities that are always available regardless of configuration
 	AlwaysAvailableCapabilities = WorkerCapabilities{
-		WebJob:       AlwaysAvailableWebCaps,
 		TelemetryJob: AlwaysAvailableTelemetryCaps,
 		TiktokJob:    AlwaysAvailableTiktokCaps,
 	}
@@ -118,6 +116,9 @@ var (
 
 	// RedditCaps are all the Reddit capabilities (only available with Apify)
 	RedditCaps = []Capability{CapScrapeUrls, CapSearchPosts, CapSearchUsers, CapSearchCommunities}
+
+	// WebCaps are all the Web capabilities (only available with Apify)
+	WebCaps = []Capability{CapScraper, CapEmpty}
 )
 
 // JobCapabilityMap defines which capabilities are valid for each job type
@@ -137,7 +138,7 @@ var JobCapabilityMap = map[JobType][]Capability{
 	TwitterApifyJob: TwitterApifyCaps,
 
 	// Web job capabilities
-	WebJob: AlwaysAvailableWebCaps,
+	WebJob: WebCaps,
 
 	// TikTok job capabilities
 	TiktokJob: combineCapabilities(
diff --git a/types/llm.go b/types/llm.go
new file mode 100644
index 0000000..fb67693
--- /dev/null
+++ b/types/llm.go
@@ -0,0 +1,15 @@
+package types
+
+type LLMProcessorRequest struct {
+	InputDatasetId    string `json:"inputDatasetId"`
+	LLMProviderApiKey string `json:"llmProviderApiKey"` // encrypted api key by miner
+	Model             string `json:"model"`
+	MultipleColumns   bool   `json:"multipleColumns"`
+	Prompt            string `json:"prompt"` // example: summarize the content of this webpage: ${markdown}
+	Temperature       string `json:"temperature"`
+	MaxTokens         int    `json:"maxTokens"`
+}
+
+type LLMProcessorResult struct {
+	LLMResponse string `json:"llmresponse"`
+}
diff --git a/types/web.go b/types/web.go
new file mode 100644
index 0000000..dda1cea
--- /dev/null
+++ b/types/web.go
@@ -0,0 +1,55 @@
+package types
+
+import (
+	"time"
+)
+
+// WebStartURL represents a single start URL configuration for web scraping
+type WebStartURL struct {
+	URL    string `json:"url"`
+	Method string `json:"method"`
+}
+
+type WebQueryType string
+
+const (
+	WebScraper WebQueryType = "scraper"
+)
+
+// WebScraperRequest represents the customizable configuration for web scraping operations
+type WebScraperRequest struct {
+	StartUrls            []WebStartURL `json:"startUrls"`
+	MaxCrawlDepth        int           `json:"maxCrawlDepth"`
+	MaxCrawlPages        int           `json:"maxCrawlPages"`
+	RespectRobotsTxtFile bool          `json:"respectRobotsTxtFile"`
+	SaveMarkdown         bool          `json:"saveMarkdown"`
+}
+
+// WebCrawlInfo contains information about the crawling process
+type WebCrawlInfo struct {
+	LoadedURL      string    `json:"loadedUrl"`
+	LoadedTime     time.Time `json:"loadedTime"`
+	ReferrerURL    string    `json:"referrerUrl"`
+	Depth          int       `json:"depth"`
+	HTTPStatusCode int       `json:"httpStatusCode"`
+}
+
+// WebMetadata contains metadata extracted from the scraped page
+type WebMetadata struct {
+	CanonicalURL string  `json:"canonicalUrl"`
+	Title        string  `json:"title"`
+	Description  *string `json:"description"`
+	Author       *string `json:"author"`
+	Keywords     *string `json:"keywords"`
+	LanguageCode *string `json:"languageCode"`
+}
+
+// WebScraperResult represents the complete result from web scraping a single page
+type WebScraperResult struct {
+	URL         string       `json:"url"`
+	Crawl       WebCrawlInfo `json:"crawl"`
+	Metadata    WebMetadata  `json:"metadata"`
+	Text        string       `json:"text"`
+	Markdown    string       `json:"markdown"`
+	LLMResponse string       `json:"llmresponse,omitempty"` // populated by LLM processor
+}