ENG-50887: Mask value using a masking config by siddhant2001 · Pull Request #227 · hypertrace/query-service

siddhant2001 · 2024-09-27T07:55:39Z

Write tests and verify correctness

github-actions · 2024-09-27T08:02:40Z

Test Results

330 tests +5 330 ✅ +5 11s ⏱️ ±0s
42 suites ±0 0 💤 ±0
42 files ±0 0 ❌ ±0

Results for commit d38e08c. ± Comparison against base commit e092fbf.

This pull request removes 72 and adds 18 tests. Note that renamed tests count towards both.

            long: 3600000
            valueType: LONG
          columnName: "SERVICE.startTime"
          long: 1570658506605
          long: 1727521612551
          long: 1727521614904
          long: 1727521615242
          long: 1727521633079
          long: 1727525212711
          long: 1727525214904
…

org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [1] filter {
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "EVENT.spanTags"
      }
    }
    operator: CONTAINS_KEY
    rhs {
      literal {
        value {
          string: "span.kind"
        }
      }
    }
  }
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "EVENT.spanTags"
        subpath: "span.kind"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          string: "client"
        }
      }
    }
  }
}
selection {
  at…, 10, server
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [1] filter {
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "API_TRACE.startTime"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728556715813
        }
      }
    }
  }
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "API_TRACE.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728560315969
        }
      }
    }
  }
  …
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [1] filter {
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "BACKEND.startTime"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728556718115
        }
      }
    }
  }
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "BACKEND.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728560318115
        }
      }
    }
  }
  chil…
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [1] filter {
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "SERVICE.startTime"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728556718415
        }
      }
    }
  }
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "SERVICE.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728560318415
        }
      }
    }
  }
  chil…
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [2] filter {
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "API_TRACE.startTime"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728556715813
        }
      }
    }
  }
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "API_TRACE.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728560315969
        }
      }
  …
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [2] filter {
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "BACKEND.startTime"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728556718115
        }
      }
    }
  }
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "BACKEND.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728560318115
        }
      }
    }
…
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [2] filter {
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "EVENT.startTime"
      }
    }
    operator: GT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1570658506605
        }
      }
    }
  }
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "EVENT.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 2570744906673
        }
      }
    }
  }
…, 2, server
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [2] filter {
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "SERVICE.startTime"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728556718415
        }
      }
    }
  }
  childFilter {
    lhs {
      attribute_expression {
        attributeId: "SERVICE.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728560318415
        }
      }
    }
…
org.hypertrace.core.query.service.htqueries.HTPinotQueriesTest ‑ [3] selection {
  function {
    functionName: "sum"
    arguments {
      attribute_expression {
        attributeId: "EVENT.spanTags"
        subpath: "otel.status_code"
      }
    }
  }
}
, 1, 0.0
org.hypertrace.core.query.service.htqueries.HTPostgresQueriesTest ‑ [1] filter {
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "SERVICE.startTime"
      }
    }
    operator: GE
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728556736851
        }
      }
    }
  }
  childFilter {
    lhs {
      columnIdentifier {
        columnName: "SERVICE.startTime"
      }
    }
    operator: LT
    rhs {
      literal {
        value {
          valueType: LONG
          long: 1728560336851
        }
      }
    }
  }
  chil…
…

♻️ This comment has been updated with latest results.

kotharironak · 2024-09-30T08:20:24Z

query-service/src/main/resources/configs/common/application.conf

+        tenantScopedMaskingCriteria = [
+          {
+            "tenantId": "testTenant",
+            "timeRangeAndMaskValues": [


So, we can have multiple TimeRange conditions, right?

Yes, each timeRange condition will have it's own masking values

kotharironak · 2024-09-30T08:22:27Z

query-service/src/main/resources/configs/common/application.conf

 #             ]
 #            }
 #         ]
+        tenantScopedMaskingCriteria = [


let's comment it in main application.conf file here. As a default, there is no masking applied.

Yes done, you've reviewed an older commit

kotharironak · 2024-10-01T08:58:41Z

...service-impl/src/main/java/org/hypertrace/core/query/service/HandlerScopedMaskingConfig.java

+public class HandlerScopedMaskingConfig {
+  private static final String TENANT_SCOPED_MASKS_CONFIG_KEY = "tenantScopedMaskingCriteria";
+  private final Map<String, List<MaskValuesForTimeRange>> tenantToMaskValuesMap;
+  private HashMap<String, Boolean> shouldMaskAttribute = new HashMap<>();


What is key in both these maps - shouldMaskAttribute and maskedValue?

AttributeId is the key to both.
Removing shouldMaskAttribute as it is not needed.

kotharironak · 2024-10-01T09:04:53Z

...service-impl/src/main/java/org/hypertrace/core/query/service/HandlerScopedMaskingConfig.java

+
+  private static boolean isTimeRangeOverlap(
+      MaskValuesForTimeRange timeRangeAndMasks, Instant queryStartTime, Instant queryEndTime) {
+    boolean timeRangeOverlap = true;


Default should be false, right?

The following conditionals check for no overlap, i.e. they set the timeRangeOverlap to false. This statement is correct.

kotharironak · 2024-10-01T09:13:36Z

...service-impl/src/main/java/org/hypertrace/core/query/service/HandlerScopedMaskingConfig.java

+        timeRangeOverlap = false;
+      }
+
+      Instant endTimeInstant = Instant.ofEpochMilli(timeRangeAndMasks.getStartTimeMillis().get());


should be getEndTimeMillis?

I guess, is this what we are looking as function?

if (timeRangeAndMasks.getStartTimeMillis().isPresent()) { Instant startTimeInstant = Instant.ofEpochMilli(timeRangeAndMasks.getStartTimeMillis().get()); Instant endTimeInstant = Instant.ofEpochMilli(timeRangeAndMasks.getEndTimeMillis().get()); if (!(startTimeInstant.isAfter(queryEndTime) || endTimeInstant.isBefore(queryStartTime))) { return true; } } return false;

Right, fixed the condition.

kotharironak · 2024-10-01T09:31:23Z

...service-impl/src/main/java/org/hypertrace/core/query/service/HandlerScopedMaskingConfig.java

+  }
+
+  public void parseColumns(ExecutionContext executionContext) {
+    shouldMaskAttribute.clear();


It seems like the state is maintained per request, but we should only test against the timeRange condition.

Pre-compute using config:

tenantId -> List of timeRanges

timeRange -> set of attributes

timeRange -> map(attributeId, maskValue)

During response processing:

Check if any time range matches.

Pick the first match (or should we apply UNION?).

If UNION is used, and the same attribute is present in two time ranges with different mask values, which one should we consider? I guess any Should be fine.

This is how I've done it.
In case of attribute in multiple time ranges, I choose any value.

kotharironak · 2024-10-01T09:42:04Z

...ice-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotBasedRequestHandler.java

+
    return Observable.fromIterable(rowBuilderList)
        .map(Builder::build)
+        //        .map(row -> handlerScopedMaskingConfig.mask(row))


Can we remove this?

kotharironak · 2024-10-07T05:18:18Z

query-service/src/main/resources/configs/common/application.conf

+#             "tenantId": "testTenant",
+#             "timeRangeAndMaskValues": [
+#               {
+#                 "startTimeMillis": 0,


I think, we should take the timestamp as mandatory.

if startTime or endTime missing -> log an warn stating that the filter will be ignored.

kotharironak · 2024-10-08T03:34:03Z

...-service-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotResultAnalyzer.java

+    if (indexToLogicalName.containsKey(colIdx)) {
+      return indexToLogicalName.get(colIdx);
+    }
+


nit: remove additional space.

kotharironak · 2024-10-08T03:34:28Z

...-service-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotResultAnalyzer.java

    return result;
  }
+
+  String getLogicalNameFromColIdx(Integer colIdx) {


nit: Lets see if we can use Optional as return type.

kotharironak · 2024-10-08T03:37:00Z

...ice-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotBasedRequestHandler.java


+  private static final String MASKED_VALUE = "*";
+  // This is how empty list is represented in Pinot
+  private static final String PINOT_EMPTY_LIST = "[\"\"]";


PINOT_EMPTY_LIST -> ARRAY_TYPE_MASKED_VALUE
MASKED_VALUE-> DEFAULT_MASKED_VALUE

anujgoyal1 · 2024-10-08T05:32:03Z

...service-impl/src/main/java/org/hypertrace/core/query/service/HandlerScopedMaskingConfig.java

+  public List<String> getMaskedAttributes(ExecutionContext executionContext) {
+    String tenantId = executionContext.getTenantId();
+    List<String> maskedAttributes = new ArrayList<>();
+    //    maskedValue.clear();


please remove this.

anujgoyal1 · 2024-10-08T05:54:38Z

...service-impl/src/main/java/org/hypertrace/core/query/service/HandlerScopedMaskingConfig.java

+      this.tenantId = config.getString(TENANT_ID_CONFIG_KEY);
+      this.maskValues =
+          config.getConfigList(TIME_RANGE_AND_MASK_VALUES_CONFIG_KEY).stream()
+              .map(MaskValuesForTimeRange::new)


filter out the empty maskings

anujgoyal1 · 2024-10-08T05:59:07Z

...ice-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotBasedRequestHandler.java

          // to retrieve data
-          String colVal = resultAnalyzer.getDataFromRow(rowId, logicalName);
+          String colVal =
+              !maskedAttributes.contains(logicalName)


nit : get rid of the ! and invert to simplify

…ry-service into mask-unbofuscated-cookies

kotharironak · 2024-10-10T06:22:55Z

...ice-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotBasedRequestHandler.java


-  Observable<Row> convert(ResultSetGroup resultSetGroup, LinkedHashSet<String> selectedAttributes) {
+  Observable<Row> convert(ResultSetGroup resultSetGroup, ExecutionContext executionContext) {
+    LinkedHashSet<String> selectedAttributes = executionContext.getSelectedColumns();


can you move this also inside resultSetGroup.getResultSetCount() > 0?

kotharironak · 2024-10-10T06:24:56Z

...ice-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotBasedRequestHandler.java

-        for (int colIdx = 0; colIdx < resultSet.getColumnCount(); colIdx++) {
+        for (int colIdx = 0, logicalColIdx = 0;
+            colIdx < resultSet.getColumnCount();
+            colIdx++, logicalColIdx++) {


Why do we need logicalColIdx?

There can be multiple map fields. When creating the idxToLogical name map, a map field would only increment the counter by one. Here each map field increments colIdx by 2. That's why I have created a new variable, which is incremented only once even if we go through a maps 2 columns (key and value)

kotharironak · 2024-10-10T06:26:18Z

...-service-impl/src/main/java/org/hypertrace/core/query/service/pinot/PinotResultAnalyzer.java

  }
+
+  Optional<String> getLogicalNameFromColIdx(Integer colIdx) {
+    return Optional.ofNullable(indexToLogicalName.get(colIdx));


On what scenario, can it be null?

You're right, it shouldn't be

kotharironak

LGTM

create masking config

46b1921

siddhant2001 requested a review from a team as a code owner September 27, 2024 07:55

siddhant2001 requested review from kotharironak, mihirgt and satish-mittal September 27, 2024 07:56

siddhant2001 added 3 commits September 30, 2024 17:26

WIP: time range filter

5e424ae

working

4f178ae

fix app.conf

a2a381c

kotharironak reviewed Oct 1, 2024

View reviewed changes

siddhant2001 added 5 commits October 1, 2024 16:59

comments

e167411

remove comment

1167c62

remove state

3201839

stateless changes

22a7c05

spotless

e033b5b

kotharironak reviewed Oct 8, 2024

View reviewed changes

comments

0421c5d

anujgoyal1 reviewed Oct 8, 2024

View reviewed changes

siddhant2001 added 4 commits October 8, 2024 12:24

comments

345b5c5

tests

4559457

logical col idx

e89416f

Merge branch 'main' into mask-unbofuscated-cookies

858fbbc

siddhant2001 requested review from anujgoyal1 and kotharironak October 8, 2024 09:18

siddhant2001 added 3 commits October 8, 2024 14:50

application.conf comments

19e4834

remove stale comments

a055b31

Merge branch 'mask-unbofuscated-cookies' of github.com:hypertrace/que…

398ac00

…ry-service into mask-unbofuscated-cookies

kotharironak reviewed Oct 10, 2024

View reviewed changes

comments

fdfcfcb

siddhant2001 added 9 commits October 10, 2024 14:37

changes

8fd154f

change config

041e520

cleanup tests

916076e

change

7b5e7ca

refactor

dc54e02

remove extra line

612f3bd

refactor

02d2d72

refactor

4ebe89c

list to set

5375d85

kotharironak self-requested a review October 10, 2024 11:28

kotharironak previously approved these changes Oct 10, 2024

View reviewed changes

vuln fix

d38e08c

siddhant2001 dismissed kotharironak’s stale review via d38e08c October 10, 2024 11:32

kotharironak approved these changes Oct 10, 2024

View reviewed changes

siddhant2001 merged commit cd60ebb into main Oct 10, 2024

siddhant2001 deleted the mask-unbofuscated-cookies branch October 10, 2024 15:09

Conversation

siddhant2001 commented Sep 27, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Sep 27, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Test Results

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kotharironak left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

siddhant2001 commented Sep 27, 2024 •

edited

Loading

github-actions bot commented Sep 27, 2024 •

edited

Loading