Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cmd/pyroscope/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -885,5 +885,15 @@ Usage of ./pyroscope:
Maximum length accepted for label names. (default 1024)
-validation.max-length-label-value int
Maximum length accepted for label value. This setting also applies to the metric name. (default 2048)
-validation.max-profile-size-bytes int
Maximum size of a profile in bytes. This is based off the uncompressed size. 0 to disable. (default 4194304)
-validation.max-profile-stacktrace-depth int
Maximum depth of a profile stacktrace. Profiles are not rejected instead stacktraces are truncated. 0 to disable. (default 1000)
-validation.max-profile-stacktrace-sample-labels int
Maximum number of labels in a profile sample. 0 to disable. (default 100)
-validation.max-profile-stacktrace-samples int
Maximum number of samples in a profile. 0 to disable. (default 4000)
-validation.max-profile-symbol-value-length int
Maximum length of a profile symbol value (labels, function names and filenames, etc...). Profiles are not rejected instead symbol values are truncated. 0 to disable. (default 1024)
-version
Show the version of phlare and exit
10 changes: 10 additions & 0 deletions cmd/pyroscope/help.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,16 @@ Usage of ./pyroscope:
Maximum length accepted for label names. (default 1024)
-validation.max-length-label-value int
Maximum length accepted for label value. This setting also applies to the metric name. (default 2048)
-validation.max-profile-size-bytes int
Maximum size of a profile in bytes. This is based off the uncompressed size. 0 to disable. (default 4194304)
-validation.max-profile-stacktrace-depth int
Maximum depth of a profile stacktrace. Profiles are not rejected instead stacktraces are truncated. 0 to disable. (default 1000)
-validation.max-profile-stacktrace-sample-labels int
Maximum number of labels in a profile sample. 0 to disable. (default 100)
-validation.max-profile-stacktrace-samples int
Maximum number of samples in a profile. 0 to disable. (default 4000)
-validation.max-profile-symbol-value-length int
Maximum length of a profile symbol value (labels, function names and filenames, etc...). Profiles are not rejected instead symbol values are truncated. 0 to disable. (default 1024)
-version
Show the version of phlare and exit

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,30 @@ limits:
# CLI flag: -validation.max-label-names-per-series
[max_label_names_per_series: <int> | default = 30]

# Maximum size of a profile in bytes. This is based off the uncompressed size.
# 0 to disable.
# CLI flag: -validation.max-profile-size-bytes
[max_profile_size_bytes: <int> | default = 2097152]

# Maximum number of samples in a profile. 0 to disable.
# CLI flag: -validation.max-profile-stacktrace-samples
[max_profile_stacktrace_samples: <int> | default = 2000]

# Maximum number of labels in a profile sample. 0 to disable.
# CLI flag: -validation.max-profile-stacktrace-sample-labels
[max_profile_stacktrace_sample_labels: <int> | default = 100]

# Maximum depth of a profile stacktrace. Profiles are not rejected instead
# stacktraces are truncated. 0 to disable.
# CLI flag: -validation.max-profile-stacktrace-depth
[max_profile_stacktrace_depth: <int> | default = 1000]

# Maximum length of a profile symbol value (labels, function names and
# filenames, etc...). Profiles are not rejected instead symbol values are
# truncated. 0 to disable.
# CLI flag: -validation.max-profile-symbol-value-length
[max_profile_symbol_value_length: <int> | default = 1024]

# The tenant's shard size used by shuffle-sharding. Must be set both on
# ingesters and distributors. 0 disables shuffle sharding.
# CLI flag: -distributor.ingestion-tenant-shard-size
Expand Down
13 changes: 13 additions & 0 deletions pkg/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ type Limits interface {
MaxLabelNameLength(tenantID string) int
MaxLabelValueLength(tenantID string) int
MaxLabelNamesPerSeries(tenantID string) int
MaxProfileSizeBytes(userID string) int
MaxProfileStacktraceSamples(userID string) int
MaxProfileStacktraceSampleLabels(userID string) int
MaxProfileStacktraceDepth(userID string) int
MaxProfileSymbolValueLength(userID string) int
}

func New(cfg Config, ingestersRing ring.ReadRing, factory ring_client.PoolFactory, limits Limits, reg prometheus.Registerer, logger log.Logger, clientsOptions ...connect.ClientOption) (*Distributor, error) {
Expand Down Expand Up @@ -212,6 +217,14 @@ func (d *Distributor) Push(ctx context.Context, req *connect.Request[pushv1.Push
d.metrics.receivedDecompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(p.SizeBytes()))
d.metrics.receivedSamples.WithLabelValues(profName, tenantID).Observe(float64(len(p.Sample)))
totalPushUncompressedBytes += int64(p.SizeBytes())

if err := validation.ValidateProfile(d.limits, tenantID, p.Profile, p.SizeBytes(), phlaremodel.Labels(series.Labels)); err != nil {
validation.DiscardedProfiles.WithLabelValues(string(validation.ReasonOf(err)), tenantID).Add(float64(totalProfiles))
validation.DiscardedBytes.WithLabelValues(string(validation.ReasonOf(err)), tenantID).Add(float64(totalPushUncompressedBytes))
p.Close()
return nil, connect.NewError(connect.CodeInvalidArgument, err)
}

p.Normalize()
symbolsSize, samplesSize := profileSizeBytes(p.Profile)
d.metrics.receivedSamplesBytes.WithLabelValues(profName, tenantID).Observe(float64(samplesSize))
Expand Down
37 changes: 37 additions & 0 deletions pkg/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ type Limits struct {
MaxLabelValueLength int `yaml:"max_label_value_length" json:"max_label_value_length"`
MaxLabelNamesPerSeries int `yaml:"max_label_names_per_series" json:"max_label_names_per_series"`

MaxProfileSizeBytes int `yaml:"max_profile_size_bytes" json:"max_profile_size_bytes"`
MaxProfileStacktraceSamples int `yaml:"max_profile_stacktrace_samples" json:"max_profile_stacktrace_samples"`
MaxProfileStacktraceSampleLabels int `yaml:"max_profile_stacktrace_sample_labels" json:"max_profile_stacktrace_sample_labels"`
MaxProfileStacktraceDepth int `yaml:"max_profile_stacktrace_depth" json:"max_profile_stacktrace_depth"`
MaxProfileSymbolValueLength int `yaml:"max_profile_symbol_value_length" json:"max_profile_symbol_value_length"`

// The tenant shard size determines the how many ingesters a particular
// tenant will be sharded to. Needs to be specified on distributors for
// correct distribution and on ingesters so that the local ingestion limit
Expand Down Expand Up @@ -81,6 +87,12 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.Var(&l.QuerySplitDuration, "querier.split-queries-by-interval", "Split queries by a time interval and execute in parallel. The value 0 disables splitting by time")

f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 0, "Maximum number of queries that will be scheduled in parallel by the frontend.")

f.IntVar(&l.MaxProfileSizeBytes, "validation.max-profile-size-bytes", 4*1024*1024, "Maximum size of a profile in bytes. This is based off the uncompressed size. 0 to disable.")
f.IntVar(&l.MaxProfileStacktraceSamples, "validation.max-profile-stacktrace-samples", 4000, "Maximum number of samples in a profile. 0 to disable.")
f.IntVar(&l.MaxProfileStacktraceSampleLabels, "validation.max-profile-stacktrace-sample-labels", 100, "Maximum number of labels in a profile sample. 0 to disable.")
f.IntVar(&l.MaxProfileStacktraceDepth, "validation.max-profile-stacktrace-depth", 1000, "Maximum depth of a profile stacktrace. Profiles are not rejected instead stacktraces are truncated. 0 to disable.")
f.IntVar(&l.MaxProfileSymbolValueLength, "validation.max-profile-symbol-value-length", 1024, "Maximum length of a profile symbol value (labels, function names and filenames, etc...). Profiles are not rejected instead symbol values are truncated. 0 to disable.")
}

// UnmarshalYAML implements the yaml.Unmarshaler interface.
Expand Down Expand Up @@ -182,6 +194,31 @@ func (o *Overrides) MaxLabelNamesPerSeries(tenantID string) int {
return o.getOverridesForTenant(tenantID).MaxLabelNamesPerSeries
}

// MaxProfileSizeBytes returns the maximum size of a profile in bytes.
func (o *Overrides) MaxProfileSizeBytes(tenantID string) int {
return o.getOverridesForTenant(tenantID).MaxProfileSizeBytes
}

// MaxProfileStacktraceSamples returns the maximum number of samples in a profile.
func (o *Overrides) MaxProfileStacktraceSamples(tenantID string) int {
return o.getOverridesForTenant(tenantID).MaxProfileStacktraceSamples
}

// MaxProfileStacktraceSampleLabels returns the maximum number of labels in a profile sample.
func (o *Overrides) MaxProfileStacktraceSampleLabels(tenantID string) int {
return o.getOverridesForTenant(tenantID).MaxProfileStacktraceSampleLabels
}

// MaxProfileStacktraceDepth returns the maximum depth of a profile stacktrace.
func (o *Overrides) MaxProfileStacktraceDepth(tenantID string) int {
return o.getOverridesForTenant(tenantID).MaxProfileStacktraceDepth
}

// MaxProfileSymbolValueLength returns the maximum length of a profile symbol value (labels, function name and filename, etc...).
func (o *Overrides) MaxProfileSymbolValueLength(tenantID string) int {
return o.getOverridesForTenant(tenantID).MaxProfileSymbolValueLength
}

// MaxLocalSeriesPerTenant returns the maximum number of series a tenant is allowed to store
// in a single ingester.
func (o *Overrides) MaxLocalSeriesPerTenant(tenantID string) int {
Expand Down
22 changes: 22 additions & 0 deletions pkg/validation/testutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ type MockLimits struct {
MaxLabelNameLengthValue int
MaxLabelValueLengthValue int
MaxLabelNamesPerSeriesValue int

MaxProfileSizeBytesValue int
MaxProfileStacktraceSamplesValue int
MaxProfileStacktraceDepthValue int
MaxProfileStacktraceSampleLabelsValue int
MaxProfileSymbolValueLengthValue int
}

func (m MockLimits) QuerySplitDuration(string) time.Duration { return m.QuerySplitDurationValue }
Expand All @@ -19,3 +25,19 @@ func (m MockLimits) MaxQueryLookback(tenantID string) time.Duration { return m.M
func (m MockLimits) MaxLabelNameLength(userID string) int { return m.MaxLabelNameLengthValue }
func (m MockLimits) MaxLabelValueLength(userID string) int { return m.MaxLabelValueLengthValue }
func (m MockLimits) MaxLabelNamesPerSeries(userID string) int { return m.MaxLabelNamesPerSeriesValue }
func (m MockLimits) MaxProfileSizeBytes(userID string) int { return m.MaxProfileSizeBytesValue }
func (m MockLimits) MaxProfileStacktraceSamples(userID string) int {
return m.MaxProfileStacktraceSamplesValue
}

func (m MockLimits) MaxProfileStacktraceDepth(userID string) int {
return m.MaxProfileStacktraceDepthValue
}

func (m MockLimits) MaxProfileStacktraceSampleLabels(userID string) int {
return m.MaxProfileStacktraceSampleLabelsValue
}

func (m MockLimits) MaxProfileSymbolValueLength(userID string) int {
return m.MaxProfileSymbolValueLengthValue
}
50 changes: 48 additions & 2 deletions pkg/validation/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/common/model"

googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
phlaremodel "github.com/grafana/pyroscope/pkg/model"
"github.com/grafana/pyroscope/pkg/util"
Expand Down Expand Up @@ -42,8 +43,9 @@ const (
DuplicateLabelNames Reason = "duplicate_label_names"
// SeriesLimit is a reason for discarding lines when we can't create a new stream
// because the limit of active streams has been reached.
SeriesLimit Reason = "series_limit"
QueryLimit Reason = "query_limit"
SeriesLimit Reason = "series_limit"
QueryLimit Reason = "query_limit"
InvalidProfile Reason = "invalid_profile"

SeriesLimitErrorMsg = "Maximum active series limit exceeded (%d/%d), reduce the number of active streams (reduce labels or reduce label values), or contact your administrator to see if the limit can be increased"
MissingLabelsErrorMsg = "error at least one label pair is required per profile"
Expand All @@ -53,6 +55,9 @@ const (
LabelValueTooLongErrorMsg = "profile with labels '%s' has label value too long: '%s'"
DuplicateLabelNamesErrorMsg = "profile with labels '%s' has duplicate label name: '%s'"
QueryTooLongErrorMsg = "the query time range exceeds the limit (query length: %s, limit: %s)"
ProfileTooBigErrorMsg = "the profile with labels '%s' size exceeds the limit (profile size: %d, limit: %d)"
ProfileTooManySamplesErrorMsg = "the profile with labels '%s' size exceeds the samples limit (actual: %d, limit: %d)"
ProfileTooManyLabelsErrorMsg = "the profile with labels '%s' size exceeds the sample labels limit (actual: %d, limit: %d)"
)

var (
Expand Down Expand Up @@ -122,6 +127,47 @@ func ValidateLabels(limits LabelValidationLimits, userID string, ls []*typesv1.L
return nil
}

type ProfileValidationLimits interface {
MaxProfileSizeBytes(userID string) int
MaxProfileStacktraceSamples(userID string) int
MaxProfileStacktraceSampleLabels(userID string) int
MaxProfileStacktraceDepth(userID string) int
MaxProfileSymbolValueLength(userID string) int
}

func ValidateProfile(limits ProfileValidationLimits, userID string, prof *googlev1.Profile, uncompressedSize int, ls phlaremodel.Labels) error {
if prof == nil {
return nil
}
if limit := limits.MaxProfileSizeBytes(userID); limit != 0 && uncompressedSize > limit {
return NewErrorf(InvalidProfile, ProfileTooBigErrorMsg, phlaremodel.LabelPairsString(ls), uncompressedSize, limit)
}
if limit, size := limits.MaxProfileStacktraceSamples(userID), len(prof.Sample); limit != 0 && size > limit {
return NewErrorf(InvalidProfile, ProfileTooManySamplesErrorMsg, phlaremodel.LabelPairsString(ls), size, limit)
}
var (
depthLimit = limits.MaxProfileStacktraceDepth(userID)
labelsLimit = limits.MaxProfileStacktraceSampleLabels(userID)
symbolLengthLimit = limits.MaxProfileSymbolValueLength(userID)
)
for _, s := range prof.Sample {

if depthLimit != 0 && len(s.LocationId) > depthLimit {
// truncate the deepest frames
s.LocationId = s.LocationId[:depthLimit]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just have realised that this truncates parent locations... This may make the flamegraph unreadable. I guess we could achieve better results with cutting the stack tip:

s.LocationId = s.LocationId[len(s.LocationId)-depthLimit:]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that wasn't intentional, let's fix it.

}
Comment on lines +155 to +158
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have a stub like "other" – I think it could be a separate PR

if labelsLimit != 0 && len(s.Label) > labelsLimit {
return NewErrorf(InvalidProfile, ProfileTooManyLabelsErrorMsg, phlaremodel.LabelPairsString(ls), len(s.Label), labelsLimit)
}
}
for i := range prof.StringTable {
if symbolLengthLimit != 0 && len(prof.StringTable[i]) > symbolLengthLimit {
prof.StringTable[i] = prof.StringTable[i][len(prof.StringTable[i])-symbolLengthLimit:]
}
}
return nil
}

func isValidServiceName(serviceNameValue string) bool {
return serviceNameValue != ""
}
Expand Down
99 changes: 97 additions & 2 deletions pkg/validation/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ import (
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"

phlaremodel "github.com/grafana/pyroscope/pkg/model"

googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
phlaremodel "github.com/grafana/pyroscope/pkg/model"
)

func TestValidateLabels(t *testing.T) {
Expand Down Expand Up @@ -199,3 +199,98 @@ func Test_ValidateRangeRequest(t *testing.T) {
})
}
}

func TestValidateProfile(t *testing.T) {
for _, tc := range []struct {
name string
profile *googlev1.Profile
size int
limits ProfileValidationLimits
expectedErr error
assert func(t *testing.T, profile *googlev1.Profile)
}{
{
"nil profile",
nil,
0,
MockLimits{},
nil,
nil,
},
{
"too big",
&googlev1.Profile{},
3,
MockLimits{
MaxProfileSizeBytesValue: 1,
},
NewErrorf(InvalidProfile, ProfileTooBigErrorMsg, `{foo="bar"}`, 3, 1),
nil,
},
{
"too many samples",
&googlev1.Profile{
Sample: make([]*googlev1.Sample, 3),
},
0,
MockLimits{
MaxProfileStacktraceSamplesValue: 2,
},
NewErrorf(InvalidProfile, ProfileTooManySamplesErrorMsg, `{foo="bar"}`, 3, 2),
nil,
},
{
"too many labels",
&googlev1.Profile{
Sample: []*googlev1.Sample{
{
Label: make([]*googlev1.Label, 3),
},
},
},
0,
MockLimits{
MaxProfileStacktraceSampleLabelsValue: 2,
},
NewErrorf(InvalidProfile, ProfileTooManyLabelsErrorMsg, `{foo="bar"}`, 3, 2),
nil,
},
{
"truncate labels and stacktrace",
&googlev1.Profile{
StringTable: []string{"foo", "/foo/bar"},
Sample: []*googlev1.Sample{
{
LocationId: []uint64{0, 1, 2, 3, 4, 5},
},
},
},
0,
MockLimits{
MaxProfileStacktraceDepthValue: 2,
MaxProfileSymbolValueLengthValue: 3,
},
nil,
func(t *testing.T, profile *googlev1.Profile) {
t.Helper()
require.Equal(t, []string{"foo", "bar"}, profile.StringTable)
require.Equal(t, []uint64{0, 1}, profile.Sample[0].LocationId)
},
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
err := ValidateProfile(tc.limits, "foo", tc.profile, tc.size, phlaremodel.LabelsFromStrings("foo", "bar"))
if tc.expectedErr != nil {
require.Error(t, err)
require.Equal(t, tc.expectedErr, err)
} else {
require.NoError(t, err)
}

if tc.assert != nil {
tc.assert(t, tc.profile)
}
})
}
}