From 04e9f35bc3fb446283f62a99fd6d689237a16d03 Mon Sep 17 00:00:00 2001 From: Steven McCanne Date: Thu, 2 Apr 2026 19:24:03 -0700 Subject: [PATCH 1/2] rework fusion framework based on immutable named types This commit reworks downcast, upcast, and fuser to presume that named types are immutable. The changes for immutability are forthcoming but the fusion code now assumes this invariant. This new design involves fused types carrying the named types instead of stripping them and deferring them to the fusion subtypes. This is key to allow the fusion runtime to manipulate named types. When support for recursive types is added, the recursive type will remain in the fusion and the concrete types will be fused below. With this approach, a fused recursive type will rarely need to be unfused. This commit also tightens up the algorithms for upcast/downcast so they should now be generally more reliable and can serve as a model for implementing their vam counterparts. --- runtime/sam/expr/agg/fuser.go | 9 + runtime/sam/expr/function/defuse.go | 10 +- runtime/sam/expr/function/downcast.go | 219 ++++++++++++++------- runtime/sam/expr/function/function.go | 2 +- runtime/sam/expr/function/fusion.go | 2 +- runtime/sam/expr/function/under.go | 21 +- runtime/sam/expr/function/upcast.go | 27 ++- runtime/ztests/expr/function/defuse.yaml | 32 ++- runtime/ztests/expr/function/downcast.yaml | 72 +++++++ runtime/ztests/expr/function/upcast.yaml | 6 +- runtime/ztests/expr/fuser.yaml | 2 +- runtime/ztests/op/blend.yaml | 2 +- runtime/ztests/op/fuse.yaml | 2 +- 13 files changed, 296 insertions(+), 110 deletions(-) create mode 100644 runtime/ztests/expr/function/downcast.yaml diff --git a/runtime/sam/expr/agg/fuser.go b/runtime/sam/expr/agg/fuser.go index 46f63eb44..cf60a28e5 100644 --- a/runtime/sam/expr/agg/fuser.go +++ b/runtime/sam/expr/agg/fuser.go @@ -121,6 +121,15 @@ func (f *Fuser) fuse(a, b super.Type) super.Type { } case *super.TypeNamed: if b, ok := b.(*super.TypeNamed); ok && a.Name == b.Name { + if a.Type != b.Type { + // The fusion algorithm does not handle named types that change. + // We will soon maked such types immutable, but for now we just + // return type error({}) to avoid any tests that might do this. + recType := f.sctx.MustLookupTypeRecord([]super.Field{ + super.NewField(a.Name, a.Type), + }) + return f.sctx.LookupTypeError(recType) + } named, err := f.sctx.LookupTypeNamed(a.Name, f.fuse(a.Type, b.Type)) if err != nil { panic(err) diff --git a/runtime/sam/expr/function/defuse.go b/runtime/sam/expr/function/defuse.go index 72f005356..31808c621 100644 --- a/runtime/sam/expr/function/defuse.go +++ b/runtime/sam/expr/function/defuse.go @@ -16,7 +16,7 @@ type defuse struct { func NewDefuse(sctx *super.Context) *defuse { return &defuse{ sctx: sctx, - downcast: &downcast{sctx: sctx}, + downcast: &downcast{sctx: sctx, name: "defuse"}, has: make(map[super.Type]bool), } } @@ -95,11 +95,11 @@ func (d *defuse) eval(in super.Value) super.Value { case *super.TypeUnion: return d.eval(in.DeunionIntoNameds()) case *super.TypeFusion: - _, subType := typ.Deref(d.sctx, in.Bytes()) - if out, ok := d.downcast.Cast(in, subType); ok { - return out + out, errVal := d.downcast.defuse(typ, in.Bytes()) + if errVal != nil { + return *errVal } - return d.sctx.WrapError("cannot defuse super value", in) + return out default: // primitives, named types, enums // BTW, named types are a barrier to defuse. diff --git a/runtime/sam/expr/function/downcast.go b/runtime/sam/expr/function/downcast.go index 04697cd6c..c669e300a 100644 --- a/runtime/sam/expr/function/downcast.go +++ b/runtime/sam/expr/function/downcast.go @@ -1,6 +1,8 @@ package function import ( + "slices" + "github.com/brimdata/super" "github.com/brimdata/super/scode" "github.com/brimdata/super/sup" @@ -8,10 +10,11 @@ import ( type downcast struct { sctx *super.Context + name string } -func NewDowncast(sctx *super.Context) Caster { - return &downcast{sctx} +func NewDowncast(sctx *super.Context, name string) Caster { + return &downcast{sctx, name} } func (d *downcast) Call(args []super.Value) super.Value { @@ -23,164 +26,238 @@ func (d *downcast) Call(args []super.Value) super.Value { if err != nil { panic(err) } - val, ok := d.Cast(from, typ) - if !ok { - return d.sctx.WrapError("downcast: value not a supertype of "+sup.FormatType(typ), from) + val, errVal := d.downcast(from.Type(), from.Bytes(), typ) + if errVal != nil { + return *errVal } return val } func (d *downcast) Cast(from super.Value, to super.Type) (super.Value, bool) { - var b scode.Builder - if ok := d.downcast(&b, from.Type(), from.Bytes(), to); ok { - return super.NewValue(to, b.Bytes().Body()), true - } - return super.Value{}, false + val, errVal := d.downcast(from.Type(), from.Bytes(), to) + return val, errVal == nil } -func (d *downcast) downcast(b *scode.Builder, typ super.Type, bytes scode.Bytes, to super.Type) bool { - typ, bytes = deunion(typ, bytes) - if superType, ok := typ.(*super.TypeFusion); ok { - superBytes, _ := superType.Deref(d.sctx, bytes) - return d.downcast(b, superType.Type, superBytes, to) +func (d *downcast) downcast(typ super.Type, bytes scode.Bytes, to super.Type) (super.Value, *super.Value) { + if _, ok := to.(*super.TypeUnion); !ok { + if fusionType, ok := typ.(*super.TypeFusion); ok { + superBytes, subtype := fusionType.Deref(d.sctx, bytes) + return d.downcast(fusionType.Type, superBytes, subtype) + } } - typ = super.TypeUnder(typ) + typ, bytes = deunion(typ, bytes) switch to := to.(type) { case *super.TypeRecord: - return d.toRecord(b, typ, bytes, to) + return d.toRecord(typ, bytes, to) case *super.TypeArray: - return d.toArray(b, typ, bytes, to) + return d.toArray(typ, bytes, to) case *super.TypeSet: - return d.toSet(b, typ, bytes, to) + return d.toSet(typ, bytes, to) case *super.TypeMap: - return d.toMap(b, typ, bytes, to) + return d.toMap(typ, bytes, to) case *super.TypeUnion: - return d.toUnion(b, typ, bytes, to) + return d.toUnion(typ, bytes, to) case *super.TypeError: - return d.toError(b, typ, bytes, to) + return d.toError(typ, bytes, to) case *super.TypeNamed: - return d.downcast(b, typ, bytes, to.Type) + return d.toNamed(typ, bytes, to) case *super.TypeFusion: // Can't downcast to a super type - return false + return super.Value{}, d.sctx.WrapError("downcast: cannot downcast to a fusion type", super.NewValue(typ, bytes)).Ptr() default: if typ == to { - b.Append(bytes) - return true + return super.NewValue(typ, bytes), nil + } else { + typ, bytes := deunion(typ, bytes) + if typ == to { + return super.NewValue(typ, bytes), nil + } } - return false + return super.Value{}, d.errMismatch(typ, bytes, to) } } -func (d *downcast) toRecord(b *scode.Builder, typ super.Type, bytes scode.Bytes, to *super.TypeRecord) bool { +func (d *downcast) defuse(fusionType *super.TypeFusion, bytes scode.Bytes) (super.Value, *super.Value) { + superBytes, subtype := fusionType.Deref(d.sctx, bytes) + return d.downcast(fusionType.Type, superBytes, subtype) +} + +func (d *downcast) toRecord(typ super.Type, bytes scode.Bytes, to *super.TypeRecord) (super.Value, *super.Value) { fromType, ok := typ.(*super.TypeRecord) if !ok { - return false + return super.Value{}, d.errMismatch(typ, bytes, to) } var nones []int var optOff int + b := scode.NewBuilder() b.BeginContainer() - for _, toField := range to.Fields { // ranging through to fields and lookup up from + for k, toField := range to.Fields { // ranging through to fields and lookup up from elemType, elemBytes, none, ok := derefWithNoneAndOk(fromType, bytes, toField.Name) if !ok { // The super value must have all the fields of the subtype cast. // It's missing a field, so fail. - return false + return super.Value{}, d.errSubtype(typ, bytes, to) } if none { if !toField.Opt { // A none can't go in a non-optional field. - return false + return super.Value{}, d.errSubtype(typ, bytes, to) } nones = append(nones, optOff) optOff++ + } else if toField.Opt && !fromType.Fields[k].Opt { + return super.Value{}, d.errSubtype(typ, bytes, to) } else { // We have the value and the to field. Downcast recursively. - if ok := d.downcast(b, elemType, elemBytes, toField.Type); !ok { - return false + val, errVal := d.downcast(elemType, elemBytes, toField.Type) + if errVal != nil { + return super.Value{}, errVal } if toField.Opt { optOff++ } + b.Append(val.Bytes()) } } b.EndContainerWithNones(to.Opts, nones) - return true + return super.NewValue(to, b.Bytes().Body()), nil } -func (d *downcast) toArray(b *scode.Builder, typ super.Type, bytes scode.Bytes, to *super.TypeArray) bool { +func (d *downcast) toArray(typ super.Type, bytes scode.Bytes, to *super.TypeArray) (super.Value, *super.Value) { if arrayType, ok := typ.(*super.TypeArray); ok { - return d.toContainer(b, arrayType.Type, bytes, to.Type) + return d.toContainer(arrayType.Type, bytes, to, to.Type) } - return false + return super.Value{}, d.errMismatch(typ, bytes, to) } -func (d *downcast) toSet(b *scode.Builder, typ super.Type, bytes scode.Bytes, to *super.TypeSet) bool { +func (d *downcast) toSet(typ super.Type, bytes scode.Bytes, to *super.TypeSet) (super.Value, *super.Value) { if setType, ok := typ.(*super.TypeSet); ok { // XXX normalize set contents? can reach into body here blah - return d.toContainer(b, setType.Type, bytes, to.Type) + return d.toContainer(setType.Type, bytes, to, to.Type) } - return false + return super.Value{}, d.errMismatch(typ, bytes, to) } -func (d *downcast) toContainer(b *scode.Builder, typ super.Type, bytes scode.Bytes, to super.Type) bool { +func (d *downcast) toContainer(elemType super.Type, bytes scode.Bytes, to super.Type, toElem super.Type) (super.Value, *super.Value) { + b := scode.NewBuilder() b.BeginContainer() for it := bytes.Iter(); !it.Done(); { - if ok := d.downcast(b, typ, it.Next(), to); !ok { - return false + val, errVal := d.downcast(elemType, it.Next(), toElem) + if errVal != nil { + return super.Value{}, errVal } + b.Append(val.Bytes()) } b.EndContainer() - return true + return super.NewValue(to, b.Bytes().Body()), nil } -func (d *downcast) toMap(b *scode.Builder, typ super.Type, bytes scode.Bytes, to *super.TypeMap) bool { +func (d *downcast) toMap(typ super.Type, bytes scode.Bytes, to *super.TypeMap) (super.Value, *super.Value) { mapType, ok := typ.(*super.TypeMap) if !ok { - return false + return super.Value{}, d.errMismatch(typ, bytes, to) } + b := scode.NewBuilder() b.BeginContainer() for it := bytes.Iter(); !it.Done(); { - if ok := d.downcast(b, mapType.KeyType, it.Next(), to.KeyType); !ok { - return false + key, errVal := d.downcast(mapType.KeyType, it.Next(), to.KeyType) + if errVal != nil { + return super.Value{}, errVal } - if ok := d.downcast(b, mapType.ValType, it.Next(), to.ValType); !ok { - return false + b.Append(key.Bytes()) + val, errVal := d.downcast(mapType.ValType, it.Next(), to.ValType) + if errVal != nil { + return super.Value{}, errVal } + b.Append(val.Bytes()) } b.EndContainer() - return true + return super.NewValue(to, b.Bytes().Body()), nil } -func (d *downcast) toUnion(b *scode.Builder, typ super.Type, bytes scode.Bytes, to *super.TypeUnion) bool { - tag := d.subTypeOf(typ, bytes, to.Types) +func (d *downcast) toUnion(typ super.Type, bytes scode.Bytes, to *super.TypeUnion) (super.Value, *super.Value) { + if typ == to { + return super.NewValue(typ, bytes), nil + } + tag, typ, bytes := d.subTypeOf(typ, bytes, to.Types) if tag < 0 { - return false + if _, ok := typ.(*super.TypeUnion); ok { + typ, bytes = deunion(typ, bytes) + return d.downcast(typ, bytes, to) + } + return super.Value{}, d.errSubtype(typ, bytes, to) } - super.BeginUnion(b, tag) - if ok := d.downcast(b, typ, bytes, to.Types[tag]); !ok { - return false + val, errVal := d.downcast(typ, bytes, to.Types[tag]) + if errVal != nil { + return super.Value{}, errVal } + b := scode.NewBuilder() + super.BeginUnion(b, tag) + b.Append(val.Bytes()) b.EndContainer() - return true + return super.NewValue(to, b.Bytes().Body()), nil } -func (d *downcast) toError(b *scode.Builder, typ super.Type, bytes scode.Bytes, to *super.TypeError) bool { +// subTypeOf finds the tag in the union array types that this value should be +// downcast to. If the child value is a fusion value, then the type must match +// the subtype of the fusion value. Otherwise, the child wasn't fused, and by +// definition of a fusion type, one of the union types must exactly match the +// child type. +func (d *downcast) subTypeOf(typ super.Type, bytes scode.Bytes, types []super.Type) (int, super.Type, []byte) { + if fusionType, ok := typ.(*super.TypeFusion); ok { + superBytes, subtype := fusionType.Deref(d.sctx, bytes) + return slices.Index(types, subtype), fusionType.Type, superBytes + } + return slices.Index(types, typ), typ, bytes +} + +func (d *downcast) toError(typ super.Type, bytes scode.Bytes, to *super.TypeError) (super.Value, *super.Value) { if errorType, ok := typ.(*super.TypeError); ok { - return d.downcast(b, errorType.Type, bytes, to.Type) + body, errVal := d.downcast(errorType.Type, bytes, to.Type) + if errVal != nil { + return super.Value{}, errVal + } + return super.NewValue(to, body.Bytes()), nil } - return false + return super.Value{}, d.errMismatch(typ, bytes, to) } -func (d *downcast) subTypeOf(typ super.Type, bytes scode.Bytes, types []super.Type) int { - // XXX TBD we should make a subtype() function that returns true if a type is - // a subtype of another and use that here and expose it to the language. - var dummy scode.Builder - for k, t := range types { - if ok := d.downcast(&dummy, typ, bytes, t); ok { - return k +func (d *downcast) toNamed(typ super.Type, bytes scode.Bytes, to *super.TypeNamed) (super.Value, *super.Value) { + if unionType, ok := typ.(*super.TypeUnion); ok { + typ, bytes = deunion(typ, bytes) + // If we are casting a union type to a named, we need to look through the + // union for the named type in question since type fusion fuses named + // types by name. Then when we find the name, we need to form the subtype + // from the union options present. + for _, t := range unionType.Types { + if named, ok := t.(*super.TypeNamed); ok && named.Name == to.Name { + typ, bytes = deunion(typ, bytes) + return super.NewValue(to, bytes), nil + } } - dummy.Reset() + return super.Value{}, d.errMismatch(typ, bytes, to) } - return -1 + if fromType, ok := typ.(*super.TypeNamed); ok { + if fromType.Name != to.Name { + return super.Value{}, d.errMismatch(typ, bytes, to) + } + val, errVal := d.downcast(fromType.Type, bytes, to.Type) + if errVal != nil { + return super.Value{}, errVal + } + return super.NewValue(to, val.Bytes()), errVal + } + val, errVal := d.downcast(typ, bytes, to.Type) + if errVal != nil { + return super.Value{}, errVal + } + return super.NewValue(to, val.Bytes()), errVal +} + +func (d *downcast) errMismatch(typ super.Type, bytes []byte, to super.Type) *super.Value { + return d.sctx.WrapError("downcast: type mismatch to "+sup.FormatType(to), super.NewValue(typ, bytes)).Ptr() +} + +func (d *downcast) errSubtype(typ super.Type, bytes []byte, to super.Type) *super.Value { + return d.sctx.WrapError("downcast: invalid subtype "+sup.FormatType(to), super.NewValue(typ, bytes)).Ptr() } diff --git a/runtime/sam/expr/function/function.go b/runtime/sam/expr/function/function.go index b207c8357..dddc1c801 100644 --- a/runtime/sam/expr/function/function.go +++ b/runtime/sam/expr/function/function.go @@ -58,7 +58,7 @@ func New(sctx *super.Context, name string, narg int) (expr.Function, error) { case "downcast": argmin = 2 argmax = 2 - f = &downcast{sctx} + f = &downcast{sctx: sctx, name: "downcast"} case "error": f = &Error{sctx: sctx} case "fields": diff --git a/runtime/sam/expr/function/fusion.go b/runtime/sam/expr/function/fusion.go index 8c16500eb..b79830cf5 100644 --- a/runtime/sam/expr/function/fusion.go +++ b/runtime/sam/expr/function/fusion.go @@ -14,7 +14,7 @@ type fusion struct { func newFusion(sctx *super.Context) *fusion { return &fusion{ sctx: sctx, - downcast: NewDowncast(sctx), + downcast: NewDowncast(sctx, "fusion"), } } diff --git a/runtime/sam/expr/function/under.go b/runtime/sam/expr/function/under.go index 5c760d527..89474566f 100644 --- a/runtime/sam/expr/function/under.go +++ b/runtime/sam/expr/function/under.go @@ -2,18 +2,17 @@ package function import ( "github.com/brimdata/super" - "github.com/brimdata/super/sup" ) type Under struct { sctx *super.Context - downcast Caster + downcast *downcast } func NewUnder(sctx *super.Context) *Under { return &Under{ sctx: sctx, - downcast: NewDowncast(sctx), + downcast: &downcast{sctx, "under"}, } } @@ -25,19 +24,11 @@ func (u *Under) Call(args []super.Value) super.Value { case *super.TypeError: return super.NewValue(typ.Type, val.Bytes()) case *super.TypeFusion: - it := val.Bytes().Iter() - bytes := it.Next() - subType, err := u.sctx.LookupByValue(it.Next()) - if err != nil { - panic(err) - } - out, ok := u.downcast.Cast(super.NewValue(typ.Type, bytes), subType) - if !ok { - // The runtime should never allow creation of a super value that - // doesn't follow the subtype invariant. - panic(sup.FormatValue(val)) + val, errVal := u.downcast.defuse(typ, val.Bytes()) + if errVal != nil { + return *errVal } - return out + return val case *super.TypeUnion: return super.NewValue(typ.Untag(val.Bytes())) case *super.TypeOfType: diff --git a/runtime/sam/expr/function/upcast.go b/runtime/sam/expr/function/upcast.go index 890c1b15d..d8e6afddd 100644 --- a/runtime/sam/expr/function/upcast.go +++ b/runtime/sam/expr/function/upcast.go @@ -41,8 +41,6 @@ func (u *Upcast) Cast(from super.Value, to super.Type) (super.Value, bool) { } func (u *Upcast) build(b *scode.Builder, typ super.Type, bytes scode.Bytes, to super.Type) bool { - typOrig := typ - typ = super.TypeUnder(typ) switch to := to.(type) { case *super.TypeRecord: return u.toRecord(b, typ, bytes, to) @@ -57,9 +55,9 @@ func (u *Upcast) build(b *scode.Builder, typ super.Type, bytes scode.Bytes, to s case *super.TypeError: return u.toError(b, typ, bytes, to) case *super.TypeNamed: - return u.build(b, typ, bytes, to.Type) + return u.toNamed(b, typ, bytes, to) case *super.TypeFusion: - return u.toFusion(b, typOrig, bytes, to) + return u.toFusion(b, typ, bytes, to) default: if typ == to { b.Append(bytes) @@ -140,11 +138,11 @@ func (u *Upcast) toSet(b *scode.Builder, typ super.Type, bytes scode.Bytes, to * return false } -func (u *Upcast) toContainer(b *scode.Builder, typ super.Type, bytes scode.Bytes, to super.Type) bool { +func (u *Upcast) toContainer(b *scode.Builder, elemType super.Type, bytes scode.Bytes, toElemType super.Type) bool { b.BeginContainer() for it := bytes.Iter(); !it.Done(); { - typ, bytes := deunion(typ, it.Next()) - if ok := u.build(b, typ, bytes, to); !ok { + elemType, bytes := deunion(elemType, it.Next()) + if ok := u.build(b, elemType, bytes, toElemType); !ok { return false } } @@ -205,6 +203,14 @@ func deunion(typ super.Type, bytes scode.Bytes) (super.Type, scode.Bytes) { } func upcastUnionTag(types []super.Type, out super.Type) int { + if named, ok := out.(*super.TypeNamed); ok { + return slices.IndexFunc(types, func(t super.Type) bool { + if t, ok := t.(*super.TypeNamed); ok && named.Name == t.Name { + return true + } + return false + }) + } k := out.Kind() if k == super.PrimitiveKind { id := out.ID() @@ -219,3 +225,10 @@ func (u *Upcast) toError(b *scode.Builder, typ super.Type, bytes scode.Bytes, to } return false } + +func (u *Upcast) toNamed(b *scode.Builder, typ super.Type, bytes scode.Bytes, to *super.TypeNamed) bool { + if named, ok := typ.(*super.TypeNamed); ok { + return u.build(b, named.Type, bytes, to.Type) + } + return false +} diff --git a/runtime/ztests/expr/function/defuse.yaml b/runtime/ztests/expr/function/defuse.yaml index d7a381797..d3c62069c 100644 --- a/runtime/ztests/expr/function/defuse.yaml +++ b/runtime/ztests/expr/function/defuse.yaml @@ -20,7 +20,6 @@ input: &input | output: *input --- -skip: this will be turned on in the next PR that reworks downcast spq: fuse | defuse(this) @@ -41,7 +40,22 @@ input: &input | |[|[1::=s1]|::=s2]|::=s3 |{1::=m1:2::=m2}|::=m3 "a"::(en1=enum(a,b)) - 1::=n1::(n2=n1|(n3=string)) + 1::=u1::(u2=u1|(u3=string)) + error(1::=er1)::=er2 + +output: *input + +--- + +spq: fuse | defuse(this) + +input: &input | + 1::=p1 + {a:{b:1::=r1}::=r2}::=r3 + [[1::=a1]::=a2]::=a3 + |[|[1::=s1]|::=s2]|::=s3 + |{1::=m1:2::=m2}|::=m3 + "a"::(en1=enum(a,b)) error(1::=er1)::=er2 output: *input @@ -51,9 +65,19 @@ output: *input spq: defuse(this) input: | - {a:fusion(1::(int64|string),)} - {a?:_::int64,b:1,c:fusion(2,)} + fusion({a?:1,b?:_::int64,c?:_::int64},<{a:int64}>) + fusion({a?:_::int64,b?:1,c?:2},<{b:int64,c:int64}>) output: | {a:1} {b:1,c:2} + +--- + +spq: fuse | defuse(this) + +input: &input | + [1::(int64|bool|string),"foo"::(int64|bool|string)] + +output: | + [1,"foo"]::[int64|bool|string] diff --git a/runtime/ztests/expr/function/downcast.yaml b/runtime/ztests/expr/function/downcast.yaml new file mode 100644 index 000000000..56643a5fd --- /dev/null +++ b/runtime/ztests/expr/function/downcast.yaml @@ -0,0 +1,72 @@ +# Test downcast on records. +spq: downcast(this[0], this[1]) + +input: | + [{x:1},<{}>] + [{x?:1},<{x:int64}>] + [{x:1::(int64|string)},<{x:int64}>] + [{x:1,y:"foo"},<{y:string}>] + [{x:1::(int64|bool|string)},<{x:int64|string}>] + // error cases + [{x?:_::int64},<{x:int64}>] + [{x:1},<{x?:int64}>] + +output: | + {} + {x:1} + {x:1} + {y:"foo"} + {x:1::(int64|string)} + error({message:"downcast: invalid subtype {x:int64}",on:{x?:_::int64}}) + error({message:"downcast: invalid subtype {x?:int64}",on:{x:1}}) + +--- + +# Test downcast on unions. +spq: downcast(this[0], this[1]) + +input: | + [1::(int64|bool|string),<(int64|string)>] + [1,<(int64|string)>] + [1::(int64|string),<(int64|bool|string)>] + // error cases + [{x:1::(int64|string)}::({x:int64|string}|{y:int64|string}),<{x:int64}|{y:string}>] + [{x:1::(int64|string)}::({x:int64|string}|{y:int64|string}|{z:int64|string}),<{x:int64}|{y:string}>] + +output: | + 1::(int64|string) + 1::(int64|string) + 1::(int64|bool|string) + error({message:"downcast: invalid subtype {x:int64}|{y:string}",on:{x:1::(int64|string)}}) + error({message:"downcast: invalid subtype {x:int64}|{y:string}",on:{x:1::(int64|string)}}) + +--- + +# Test downcast on arrays. +spq: downcast(this[0], this[1]) + +input: | + [[1::(int64|bool|string),"foo"::(int64|bool|string)],<[int64|bool|string]>] + [[{x:1::(int64|string)},{y:"foo"::(int64|string)}],<[{x:int64}|{y:string}]>] + +output: | + [1,"foo"]::[int64|bool|string] + error({message:"downcast: invalid subtype {x:int64}|{y:string}",on:{x:1::(int64|string)}}) +--- + +# This should seemingly fail because int64|bool|string is not subtype of +# int64|string, but this condition is difficult to detect in general and +# allowing such values doesn't hurt fuse/defuse as fuse would never create +# such a value. To see, why this is difficult consider the second case, +# which is created by fuse. Because fuse flattens unions but the data model +# allows for unnested values, this can arise, e.g., fusing the values +# 1 with 2::(int64|(foo=(int64|string))). +spq: downcast(this[0], this[1]) + +input: | + [1::(int64|string),<(int64|bool|string)>] + [2::(int64|string),<(int64|(foo=(int64|string)))>] + +output: | + 1::(int64|bool|string) + 2::(int64|(foo=int64|string)) diff --git a/runtime/ztests/expr/function/upcast.yaml b/runtime/ztests/expr/function/upcast.yaml index 2cafc3351..5a46cd376 100644 --- a/runtime/ztests/expr/function/upcast.yaml +++ b/runtime/ztests/expr/function/upcast.yaml @@ -11,9 +11,9 @@ input: | [[[1::=n1]::=n2]::=n3,] [|[|[1::=n1]|::=n2]|::=n3,] [|{1::=n1:2::=n2}|::=n3,] - [1::=n1::(n2=n1|(n3=string)),] + [1::(n4=(n5=int64)),] ["a"::n1=enum(a,b),] - [1::=n1,] + [1::=n1,] output: | error({message:"upcast: value not a subtype of [int8|string]",on:[1,"a"]}) @@ -25,7 +25,7 @@ output: | |{1::=n5:2::=n6}|::=n4 1::=n5::(n4=n5|(n6=string)) "a"::(n2=enum(a,b)) - fusion(1::(int64|string),) + fusion(1::=n1::(n1|string),) --- diff --git a/runtime/ztests/expr/fuser.yaml b/runtime/ztests/expr/fuser.yaml index e50dacfa9..886727ac9 100644 --- a/runtime/ztests/expr/fuser.yaml +++ b/runtime/ztests/expr/fuser.yaml @@ -12,6 +12,6 @@ spq: fuse | defuse(this) input: &input | "foo"::(int64|string) "foo"::=named - "foo"::(named=int64|string) + "foo"::(named2=int64|string) output: *input diff --git a/runtime/ztests/op/blend.yaml b/runtime/ztests/op/blend.yaml index 38bed0e2f..0d74cba10 100644 --- a/runtime/ztests/op/blend.yaml +++ b/runtime/ztests/op/blend.yaml @@ -141,6 +141,6 @@ output: | [1::=a1]::=a2::((p1=int64)|(r2={a:r1=int64})|a2|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))) |[1::=s1]|::=s2::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|s2|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))) |{1::=m1:2::=m2}|::=m3::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|m3|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))) - 1::=p1::(p1|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))) + 1::=u1::(u2=u1|(u3=string))::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|u2|(en1=enum(a,b))|(er2=error(er1=int64))) "a"::(en1=enum(a,b))::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|en1|(er2=error(er1=int64))) error(1::=er1)::=er2::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|er2) diff --git a/runtime/ztests/op/fuse.yaml b/runtime/ztests/op/fuse.yaml index a39a4707a..8ce2cfb0f 100644 --- a/runtime/ztests/op/fuse.yaml +++ b/runtime/ztests/op/fuse.yaml @@ -141,6 +141,6 @@ output: | fusion([1::=a1]::=a2::((p1=int64)|(r2={a:r1=int64})|a2|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))),) fusion(|[1::=s1]|::=s2::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|s2|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))),) fusion(|{1::=m1:2::=m2}|::=m3::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|m3|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))),) - fusion(1::=p1::(p1|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|(er2=error(er1=int64))),) + fusion(1::=u1::(u2=u1|(u3=string))::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|u2|(en1=enum(a,b))|(er2=error(er1=int64))),) fusion("a"::(en1=enum(a,b))::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|en1|(er2=error(er1=int64))),) fusion(error(1::=er1)::=er2::((p1=int64)|(r2={a:r1=int64})|(a2=[a1=int64])|(s2=|[s1=int64]|)|(m3=|{m1=int64:m2=int64}|)|(u2=(u1=int64)|(u3=string))|(en1=enum(a,b))|er2),) From d49c05d1e5a39c6f8004470bbeaa78b996e2406c Mon Sep 17 00:00:00 2001 From: Steven McCanne Date: Thu, 9 Apr 2026 10:22:29 -0700 Subject: [PATCH 2/2] address PR feedback --- runtime/sam/expr/agg/fuser.go | 2 +- runtime/sam/expr/function/upcast.go | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/runtime/sam/expr/agg/fuser.go b/runtime/sam/expr/agg/fuser.go index cf60a28e5..ae030e7f5 100644 --- a/runtime/sam/expr/agg/fuser.go +++ b/runtime/sam/expr/agg/fuser.go @@ -123,7 +123,7 @@ func (f *Fuser) fuse(a, b super.Type) super.Type { if b, ok := b.(*super.TypeNamed); ok && a.Name == b.Name { if a.Type != b.Type { // The fusion algorithm does not handle named types that change. - // We will soon maked such types immutable, but for now we just + // We will soon make such types immutable, but for now we just // return type error({}) to avoid any tests that might do this. recType := f.sctx.MustLookupTypeRecord([]super.Field{ super.NewField(a.Name, a.Type), diff --git a/runtime/sam/expr/function/upcast.go b/runtime/sam/expr/function/upcast.go index d8e6afddd..3b4a33a08 100644 --- a/runtime/sam/expr/function/upcast.go +++ b/runtime/sam/expr/function/upcast.go @@ -205,10 +205,8 @@ func deunion(typ super.Type, bytes scode.Bytes) (super.Type, scode.Bytes) { func upcastUnionTag(types []super.Type, out super.Type) int { if named, ok := out.(*super.TypeNamed); ok { return slices.IndexFunc(types, func(t super.Type) bool { - if t, ok := t.(*super.TypeNamed); ok && named.Name == t.Name { - return true - } - return false + typ, ok := t.(*super.TypeNamed) + return ok && named.Name == typ.Name }) } k := out.Kind()