Refactor selection set implementation to be immutable (#2387)

The current (before this commit) implementation of `SelectionSet` is inherently mutable: its `SelectionSet.add` method mutate the selection set it's called on. Which is at odds with the general query planner algorithm, which is the main user of `SelectionSet`, but is fundamentally based on dealing with immutable data (as we explore various "path" options and the possible plans, most things are shared (at least partly)). This mismatch between the query planner that really want immutable selection sets but an implementation that is mutable leads to less than optimal code complexity and performance. More specificially, if we want to use a `SelectionSet` into 2 separate branch of the algorithm, we usually have to do defensive deep copies, which is inefficient. And to mitigate those inefficiencies, we sometimes don't do those copies, but this lead to code that fragile and easy to break and has lead to bug previously (where we save a copy, but then a branch mistakenly mutated and thus impacted another branch mistakenly). A few tricks had been added to the implementation to help mitigate the risks (the `freeze` behaviour, and the copy-on-write in `FetchGroup`), but they add complexity and aren't always optimal performance wise. This commit thus rework the implementation/api of `SelectionSet` to make it an immutable data structure. Doing so makes the code a lot safer and easy to reason about. And it also provide some performance benefits: compared to current `main` over a set of production schema/query, this seem to provide around 20% improvement on average and up to almost 50% improvement on some specific cases (disclaimer: those benchmark are fairly unscientific so those numbers should be taken with a grain of salt, but the numbers are clear enough that this is a net measurable gain).
apollographql · Mar 15, 2023 · 260c357 · 260c357
1 parent 01b9c31
commit 260c357
Show file tree

Hide file tree

Showing 16 changed files with 1,636 additions and 1,414 deletions.
diff --git a/.changeset/curvy-balloons-flash.md b/.changeset/curvy-balloons-flash.md
@@ -0,0 +1,11 @@
+---
+"@apollo/composition": patch
+"@apollo/gateway": patch
+"@apollo/federation-internals": patch
+"@apollo/query-graphs": patch
+"@apollo/query-planner": patch
+---
+
+Refactor the internal implementation of selection sets used by the query planner to decrease the code complexity and
+improve query plan generation performance in many cases.
+
diff --git a/composition-js/src/validate.ts b/composition-js/src/validate.ts
@@ -28,9 +28,11 @@ import {
   selectionOfElement,
   SelectionSet,
   SubgraphASTNode,
+  selectionSetOf,
   typenameFieldName,
   validateSupergraph,
-  VariableDefinitions
+  VariableDefinitions,
+  isOutputType
 } from "@apollo/federation-internals";
 import {
   Edge,
@@ -209,7 +211,8 @@ function buildWitnessNextStep(edges: Edge[], index: number): SelectionSet | unde
     // ellipsis instead make it immediately clear after which part of the query there is an issue.
     const lastType = edges[edges.length -1].tail.type;
     // Note that vertex types are named type and output ones, so if it's not a leaf it is guaranteed to be selectable.
-    return isLeafType(lastType) ? undefined : new SelectionSet(lastType as CompositeType);
+    assert(isOutputType(lastType), 'Should not have input types as vertex types');
+    return isLeafType(lastType) ? undefined : new SelectionSet(lastType);
   }
 
   const edge = edges[index];
@@ -235,17 +238,19 @@ function buildWitnessNextStep(edges: Edge[], index: number): SelectionSet | unde
       assert(false, `Invalid edge ${edge} found in supergraph path`);
   }
   // If we get here, the edge is either a downcast or a field, so the edge head must be selectable.
-  const selectionSet = new SelectionSet(edge.head.type as CompositeType);
-  selectionSet.add(selection);
-  return selectionSet;
+  return selectionSetOf(edge.head.type as CompositeType, selection);
 }
 
 function buildWitnessField(definition: FieldDefinition<any>): Field {
+  if (definition.arguments().length === 0) {
+    return new Field(definition);
+  }
+
   const args = Object.create(null);
   for (const argDef of definition.arguments()) {
     args[argDef.name] = generateWitnessValue(argDef.type!);
   }
-  return new Field(definition, args, new VariableDefinitions());
+  return new Field(definition, args);
 }
 
 function generateWitnessValue(type: InputType): any {
@@ -743,7 +748,7 @@ class ConditionValidationResolver {
       const pathsOptions = advanceSimultaneousPathsWithOperation(
         this.supergraphSchema,
         paths,
-        state.selection.element(),
+        state.selection.element,
       );
       if (!pathsOptions) {
         continue;

diff --git a/gateway-js/src/executeQueryPlan.ts b/gateway-js/src/executeQueryPlan.ts
@@ -189,7 +189,7 @@ export async function executeQueryPlan(
             operation,
             variables: requestContext.request.variables,
             input: unfilteredData,
-            introspectionHandling: (f) => executeIntrospection(operationContext.schema, f.expandFragments().toSelectionNode()),
+            introspectionHandling: (f) => executeIntrospection(operationContext.schema, f.expandAllFragments().toSelectionNode()),
           }));
 
           // If we have errors during the post-processing, we ignore them if any other errors have been thrown during

diff --git a/gateway-js/src/resultShaping.ts b/gateway-js/src/resultShaping.ts
@@ -161,12 +161,12 @@ function applySelectionSet({
   parentType: CompositeType,
 }): ApplyResult {
   for (const selection of selectionSet.selections()) {
-    if (shouldSkip(selection.element(), parameters)) {
+    if (shouldSkip(selection.element, parameters)) {
       continue;
     }
 
     if (selection.kind === 'FieldSelection') {
-      const field = selection.element();
+      const field = selection.element;
       const fieldType = field.definition.type!;
       const responseName = field.responseName();
       const outputValue = output[responseName];
@@ -241,7 +241,7 @@ function applySelectionSet({
         return ApplyResult.NULL_BUBBLE_UP;
       }
     } else {
-      const fragment = selection.element();
+      const fragment = selection.element;
       const typename = input[typenameFieldName];
       assert(!typename || typeof typename === 'string', () => `Got unexpected value for __typename: ${typename}`);
       if (typeConditionApplies(parameters.schema, fragment.typeCondition, typename, parentType)) {

diff --git a/internals-js/package.json b/internals-js/package.json
@@ -24,7 +24,9 @@
   },
   "dependencies": {
     "chalk": "^4.1.0",
-    "js-levenshtein": "^1.1.6"
+    "js-levenshtein": "^1.1.6",
+    "@types/uuid": "^8.3.4",
+    "uuid": "^9.0.0"
   },
   "publishConfig": {
     "access": "public"

diff --git a/internals-js/src/__tests__/operations.test.ts b/internals-js/src/__tests__/operations.test.ts
@@ -4,7 +4,7 @@ import {
   SchemaRootKind,
 } from '../../dist/definitions';
 import { buildSchema } from '../../dist/buildSchema';
-import { Field, FieldSelection, Operation, operationFromDocument, parseOperation, SelectionSet } from '../../dist/operations';
+import { MutableSelectionSet, Operation, operationFromDocument, parseOperation } from '../../dist/operations';
 import './matchers';
 import { DocumentNode, FieldNode, GraphQLError, Kind, OperationDefinitionNode, OperationTypeNode, SelectionNode, SelectionSetNode } from 'graphql';
 
@@ -242,124 +242,6 @@ describe('fragments optimization', () => {
   });
 });
 
-describe('selection set freezing', () => {
-  const schema = parseSchema(`
-    type Query {
-      t: T
-    }
-
-    type T {
-      a: Int
-      b: Int
-    }
-  `);
-
-  const tField = schema.schemaDefinition.rootType('query')!.field('t')!;
-
-  it('throws if one tries to modify a frozen selection set', () => {
-    // Note that we use parseOperation to help us build selection/selection sets because it's more readable/convenient
-    // thant to build the object "programmatically".
-    const s1 = parseOperation(schema, `{ t { a } }`).selectionSet;
-    const s2 = parseOperation(schema, `{ t { b } }`).selectionSet;
-
-    const s = new SelectionSet(schema.schemaDefinition.rootType('query')!);
-
-    // Control: check we can add to the selection set while not yet frozen
-    expect(s.isFrozen()).toBeFalsy();
-    expect(() => s.mergeIn(s1)).not.toThrow();
-
-    s.freeze();
-    expect(s.isFrozen()).toBeTruthy();
-    expect(() => s.mergeIn(s2)).toThrowError('Cannot add to frozen selection: { t { a } }');
-  });
-
-  it('it does not clone non-frozen selections when adding to another one', () => {
-    // This test is a bit debatable because what it tests is not so much a behaviour
-    // we *need* absolutely to preserve, but rather test how things happens to
-    // behave currently and illustrate the current contrast between frozen and
-    // non-frozen selection set.
-    // That is, this test show what happens if the test
-    //   "it automaticaly clones frozen selections when adding to another one"
-    // is done without freezing.
-
-    const s1 = parseOperation(schema, `{ t { a } }`).selectionSet;
-    const s2 = parseOperation(schema, `{ t { b } }`).selectionSet;
-    const s = new SelectionSet(schema.schemaDefinition.rootType('query')!);
-
-    s.mergeIn(s1);
-    s.mergeIn(s2);
-
-    expect(s.toString()).toBe('{ t { a b } }');
-
-    // This next assertion is where differs from the case where `s1` is frozen. Namely,
-    // when we do `s.mergeIn(s1)`, then `s` directly references `s1` without cloning
-    // and thus the next modification (`s.mergeIn(s2)`) ends up modifying both `s` and `s1`.
-    // Note that we don't mean by this test that the fact that `s.mergeIn(s1)` does
-    // not clone `s1` is a behaviour one should *rely* on, but it currently done for
-    // efficiencies sake: query planning does a lot of selection set building through
-    // `SelectionSet::mergeIn` and `SelectionSet::add` and we often pass to those method
-    // newly constructed selections as input, so cloning them would wast CPU and early
-    // query planning benchmarking showed that this could add up on the more expansive
-    // plan computations. This is why freezing exists: it allows us to save cloning
-    // in general, but to protect those selection set we know should be immutable
-    // so they do get cloned in such situation.
-    expect(s1.toString()).toBe('{ t { a b } }');
-    expect(s2.toString()).toBe('{ t { b } }');
-  });
-
-  it('it automaticaly clones frozen field selections when merging to another one', () => {
-    const s1 = parseOperation(schema, `{ t { a } }`).selectionSet.freeze();
-    const s2 = parseOperation(schema, `{ t { b } }`).selectionSet.freeze();
-    const s = new SelectionSet(schema.schemaDefinition.rootType('query')!);
-
-    s.mergeIn(s1);
-    s.mergeIn(s2);
-
-    // We check S is what we expect...
-    expect(s.toString()).toBe('{ t { a b } }');
-
-    // ... but more importantly for this test, that s1/s2 were not modified.
-    expect(s1.toString()).toBe('{ t { a } }');
-    expect(s2.toString()).toBe('{ t { b } }');
-  });
-
-  it('it automaticaly clones frozen fragment selections when merging to another one', () => {
-    // Note: needlessly complex queries, but we're just ensuring the cloning story works when fragments are involved
-    const s1 = parseOperation(schema, `{ ... on Query { t { ... on T { a } } } }`).selectionSet.freeze();
-    const s2 = parseOperation(schema, `{ ... on Query { t { ... on T { b } } } }`).selectionSet.freeze();
-    const s = new SelectionSet(schema.schemaDefinition.rootType('query')!);
-
-    s.mergeIn(s1);
-    s.mergeIn(s2);
-
-    expect(s.toString()).toBe('{ ... on Query { t { ... on T { a b } } } }');
-
-    expect(s1.toString()).toBe('{ ... on Query { t { ... on T { a } } } }');
-    expect(s2.toString()).toBe('{ ... on Query { t { ... on T { b } } } }');
-  });
-
-  it('it automaticaly clones frozen field selections when adding to another one', () => {
-    const s1 = parseOperation(schema, `{ t { a } }`).selectionSet.freeze();
-    const s2 = parseOperation(schema, `{ t { b } }`).selectionSet.freeze();
-    const s = new SelectionSet(schema.schemaDefinition.rootType('query')!);
-    const tSelection = new FieldSelection(new Field(tField));
-    s.add(tSelection);
-
-    // Note that this test both checks the auto-cloning for the `add` method, but
-    // also shows that freezing dose apply deeply (since we freeze the whole `s1`/`s2`
-    // but only add some sub-selection of it).
-    tSelection.selectionSet!.add(s1.selections()[0].selectionSet!.selections()[0]);
-    tSelection.selectionSet!.add(s2.selections()[0].selectionSet!.selections()[0]);
-
-    // We check S is what we expect...
-    expect(s.toString()).toBe('{ t { a b } }');
-
-    // ... but more importantly for this test, that s1/s2 were not modified.
-    expect(s1.toString()).toBe('{ t { a } }');
-    expect(s2.toString()).toBe('{ t { b } }');
-  });
-});
-
 describe('validations', () => {
   test.each([
     { directive: '@defer', rootKind: 'mutation' },
@@ -517,3 +399,150 @@ describe('empty branches removal', () => {
     )).toBe('{ u }');
   });
 });
+
+describe('basic operations', () => {
+  const schema = parseSchema(`
+    type Query {
+      t: T
+      i: I
+    }
+
+    type T {
+      v1: Int
+      v2: String
+      v3: I
+    }
+
+    interface I {
+      x: Int
+      y: Int
+    }
+
+    type A implements I {
+      x: Int
+      y: Int
+      a1: String
+      a2: String
+    }
+
+    type B implements I {
+      x: Int
+      y: Int
+      b1: Int
+      b2: T
+    }
+  `);
+
+  const operation = parseOperation(schema, `
+    {
+      t {
+        v1
+        v3 {
+          x
+        }
+      }
+      i {
+        ... on A {
+          a1
+          a2
+        }
+        ... on B {
+          y
+          b2 {
+           v2
+          }
+        }
+      }
+    }
+  `);
+
+  test('forEachElement', () => {
+    // We collect a pair of (parent type, field-or-fragment).
+    const actual: [string, string][] = [];
+    operation.selectionSet.forEachElement((elt) => actual.push([elt.parentType.name, elt.toString()]));
+    expect(actual).toStrictEqual([
+      ['Query', 't'],
+      ['T', 'v1'],
+      ['T', 'v3'],
+      ['I', 'x'],
+      ['Query', 'i'],
+      ['I', '... on A'],
+      ['A', 'a1'],
+      ['A', 'a2'],
+      ['I', '... on B'],
+      ['B', 'y'],
+      ['B', 'b2'],
+      ['T', 'v2'],
+    ]);
+  })
+});
+
+
+describe('MutableSelectionSet', () => {
+  test('memoizer', () => {
+    const schema = parseSchema(`
+      type Query {
+        t: T
+      }
+
+      type T {
+        v1: Int
+        v2: String
+        v3: Int
+        v4: Int
+      }
+    `);
+
+    type Value = {
+      count: number
+    };
+
+    let calls = 0;
+    const sets: string[] = [];
+
+    const queryType = schema.schemaDefinition.rootType('query')!;
+    const ss = MutableSelectionSet.emptyWithMemoized<Value>(
+      queryType,
+      (s) => {
+        sets.push(s.toString());
+        return { count: ++calls };
+      }
+    );
+
+    expect(ss.memoized().count).toBe(1);
+    // Calling a 2nd time with no change to make sure we're not re-generating the value.
+    expect(ss.memoized().count).toBe(1);
+
+    ss.updates().add(parseOperation(schema, `{ t { v1 } }`).selectionSet);
+
+    expect(ss.memoized().count).toBe(2);
+    expect(sets).toStrictEqual(['{}', '{ t { v1 } }']);
+
+    ss.updates().add(parseOperation(schema, `{ t { v3 } }`).selectionSet);
+
+    expect(ss.memoized().count).toBe(3);
+    expect(sets).toStrictEqual(['{}', '{ t { v1 } }', '{ t { v1 v3 } }']);
+
+    // Still making sure we don't re-compute without updates.
+    expect(ss.memoized().count).toBe(3);
+
+    const cloned = ss.clone();
+    expect(cloned.memoized().count).toBe(3);
+
+    cloned.updates().add(parseOperation(schema, `{ t { v2 } }`).selectionSet);
+
+    // The value of `ss` should not have be recomputed, so it should still be 3.
+    expect(ss.memoized().count).toBe(3);
+    // But that of the clone should have changed.
+    expect(cloned.memoized().count).toBe(4);
+    expect(sets).toStrictEqual(['{}', '{ t { v1 } }', '{ t { v1 v3 } }', '{ t { v1 v3 v2 } }']);
+
+    // And here we make sure that if we update the fist selection, we don't have v3 in the set received
+    ss.updates().add(parseOperation(schema, `{ t { v4 } }`).selectionSet);
+    // Here, only `ss` memoized value has been recomputed. But since both increment the same `calls` variable,
+    // the total count should be 5 (even if the previous count for `ss` was only 3).
+    expect(ss.memoized().count).toBe(5);
+    expect(cloned.memoized().count).toBe(4);
+    expect(sets).toStrictEqual(['{}', '{ t { v1 } }', '{ t { v1 v3 } }', '{ t { v1 v3 v2 } }', '{ t { v1 v3 v4 } }']);
+  });
+});