From 1c5ce382eed705af3a44ddcfdeb1e799dde9eed3 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Tue, 21 Jul 2015 15:42:25 +1000 Subject: [PATCH 01/10] Split Seq.groupBy for ValueType/RefType The StructBox makes code that contains "hard" tail calls, which means that performance suffers under the 64 bit JIT --- src/fsharp/FSharp.Core/seq.fs | 57 +++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/src/fsharp/FSharp.Core/seq.fs b/src/fsharp/FSharp.Core/seq.fs index fc297daebc2..ca824e07493 100644 --- a/src/fsharp/FSharp.Core/seq.fs +++ b/src/fsharp/FSharp.Core/seq.fs @@ -1444,32 +1444,45 @@ namespace Microsoft.FSharp.Collections checkNonNull "source" source mkSeq (fun () -> source.GetEnumerator()) - - [] - let groupBy keyf seq = + let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (seq:seq<'T>) = + let dict = Dictionary<_,ResizeArray<_>> comparer + + // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying + // for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the + // Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. + let minimumBucketSize = 4 + + // Build the groupings + seq |> iter (fun v -> + let key = keyf v + let mutable prev = Unchecked.defaultof<_> + match dict.TryGetValue (key, &prev) with + | true -> prev.Add v + | false -> + let prev = ResizeArray minimumBucketSize + dict.[key] <- prev + prev.Add v) + + // Trim the size of each result group, don't trim very small buckets, as excessive work, and garbage for + // minimal gain + dict |> iter (fun group -> if group.Value.Count > minimumBucketSize then group.Value.TrimExcess()) + + // Return the sequence-of-sequences. Don't reveal the + // internal collections: just reveal them as sequences + dict |> map (fun group -> (getKey group.Key, readonly group.Value)) - mkDelayedSeq (fun () -> - // Wrap a StructBox(_) around all keys in case the key type is itself a type using null as a representation - let dict = new Dictionary,ResizeArray<'T>>(StructBox<'Key>.Comparer) + // We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance + let groupByValueType (keyf:'T->'Key) (seq:seq<'T>) = seq |> groupByImpl HashIdentity.Structural<'Key> keyf id - // Build the groupings - seq |> iter (fun v -> - let key = StructBox (keyf v) - let ok,prev = dict.TryGetValue(key) - if ok then - prev.Add(v) - else - let prev = new ResizeArray<'T>(1) - dict.[key] <- prev - prev.Add(v)) + // Wrap a StructBox around all keys in case the key type is itself a type using null as a representation + let groupByRefType (keyf:'T->'Key) (seq:seq<'T>) = seq |> groupByImpl StructBox<'Key>.Comparer (fun t -> StructBox (keyf t)) (fun sb -> sb.Value) - // Trim the size of each result group. - dict |> iter (fun group -> group.Value.TrimExcess()) - - // Return the sequence-of-sequences. Don't reveal the - // internal collections: just reveal them as sequences - dict |> map (fun group -> (group.Key.Value, readonly group.Value))) + [] + let groupBy (keyf:'T->'Key) (seq:seq<'T>) = + if typeof<'T>.IsValueType + then mkDelayedSeq (fun () -> groupByValueType keyf seq) + else mkDelayedSeq (fun () -> groupByRefType keyf seq) [] let distinct source = From c06d8e6f6762e9f42dd9dad9b81daba415838aa4 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Tue, 21 Jul 2015 16:07:33 +1000 Subject: [PATCH 02/10] Split Seq.countBy for ValueType/RefType --- src/fsharp/FSharp.Core/seq.fs | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/fsharp/FSharp.Core/seq.fs b/src/fsharp/FSharp.Core/seq.fs index ca824e07493..b217805bd62 100644 --- a/src/fsharp/FSharp.Core/seq.fs +++ b/src/fsharp/FSharp.Core/seq.fs @@ -1444,7 +1444,6 @@ namespace Microsoft.FSharp.Collections checkNonNull "source" source mkSeq (fun () -> source.GetEnumerator()) - [] let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (seq:seq<'T>) = let dict = Dictionary<_,ResizeArray<_>> comparer @@ -1536,19 +1535,32 @@ namespace Microsoft.FSharp.Collections let inline compareDescending a b = compare b a sortWith compareDescending source - [] - let countBy keyf source = + let inline countByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (source:seq<'T>) = checkNonNull "source" source - mkDelayedSeq (fun () -> - let dict = new Dictionary,int>(StructBox<'Key>.Comparer) - // Build the groupings - source |> iter (fun v -> - let key = StructBox (keyf v ) - let mutable prev = Unchecked.defaultof<_> - if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1) + let dict = Dictionary comparer + + // Build the groupings + source |> iter (fun v -> + let key = keyf v + let mutable prev = Unchecked.defaultof<_> + if dict.TryGetValue(key, &prev) + then dict.[key] <- prev + 1 + else dict.[key] <- 1) + + dict |> map (fun group -> (getKey group.Key, group.Value)) + + // We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance + let countByValueType (keyf:'T->'Key) (seq:seq<'T>) = seq |> countByImpl HashIdentity.Structural<'Key> keyf id - dict |> map (fun group -> (group.Key.Value, group.Value))) + // Wrap a StructBox around all keys in case the key type is itself a type using null as a representation + let countByRefType (keyf:'T->'Key) (seq:seq<'T>) = seq |> countByImpl StructBox<'Key>.Comparer (fun t -> StructBox (keyf t)) (fun sb -> sb.Value) + + [] + let countBy (keyf:'T->'Key) (seq:seq<'T>) = + if typeof<'T>.IsValueType + then mkDelayedSeq (fun () -> countByValueType keyf seq) + else mkDelayedSeq (fun () -> countByRefType keyf seq) [] let inline sum (source: seq< (^a) >) : ^a = From 202e12e6313f2059fd307b290204113a269ddbe9 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Wed, 22 Jul 2015 04:27:45 +1000 Subject: [PATCH 03/10] "Fixing" Reflection issues with Profile builds There must be some other way to check if a type is a Value Type? I doubt if it has been removed? --- src/fsharp/FSharp.Core/seq.fs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/fsharp/FSharp.Core/seq.fs b/src/fsharp/FSharp.Core/seq.fs index b217805bd62..d218694e5f1 100644 --- a/src/fsharp/FSharp.Core/seq.fs +++ b/src/fsharp/FSharp.Core/seq.fs @@ -1479,9 +1479,15 @@ namespace Microsoft.FSharp.Collections [] let groupBy (keyf:'T->'Key) (seq:seq<'T>) = - if typeof<'T>.IsValueType + checkNonNull "source" seq + +#if FX_ATLEAST_40 + if typeof<'Key>.IsValueType then mkDelayedSeq (fun () -> groupByValueType keyf seq) else mkDelayedSeq (fun () -> groupByRefType keyf seq) +#else + mkDelayedSeq (fun () -> groupByRefType keyf seq) +#endif [] let distinct source = @@ -1557,10 +1563,16 @@ namespace Microsoft.FSharp.Collections let countByRefType (keyf:'T->'Key) (seq:seq<'T>) = seq |> countByImpl StructBox<'Key>.Comparer (fun t -> StructBox (keyf t)) (fun sb -> sb.Value) [] - let countBy (keyf:'T->'Key) (seq:seq<'T>) = - if typeof<'T>.IsValueType - then mkDelayedSeq (fun () -> countByValueType keyf seq) - else mkDelayedSeq (fun () -> countByRefType keyf seq) + let countBy (keyf:'T->'Key) (source:seq<'T>) = + checkNonNull "source" source + +#if FX_ATLEAST_40 + if typeof<'Key>.IsValueType + then mkDelayedSeq (fun () -> countByValueType keyf source) + else mkDelayedSeq (fun () -> countByRefType keyf source) +#else + mkDelayedSeq (fun () -> countByRefType keyf source) +#endif [] let inline sum (source: seq< (^a) >) : ^a = From d80e6162abd76980abc4a3cfa260188402e2f352 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Wed, 22 Jul 2015 04:43:54 +1000 Subject: [PATCH 04/10] Split List.countBy by RefType/ValueType --- src/fsharp/FSharp.Core/list.fs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/fsharp/FSharp.Core/list.fs b/src/fsharp/FSharp.Core/list.fs index 541e5e31fc3..48713a9fca9 100644 --- a/src/fsharp/FSharp.Core/list.fs +++ b/src/fsharp/FSharp.Core/list.fs @@ -41,23 +41,38 @@ namespace Microsoft.FSharp.Collections [] let concat lists = Microsoft.FSharp.Primitives.Basics.List.concat lists - [] - let countBy projection (list:'T list) = - let dict = new Dictionary,int>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer) + let inline countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (list:'T list) = + let dict = Dictionary comparer let rec loop srcList = match srcList with | [] -> () | h::t -> - let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection h) + let key = projection h let mutable prev = 0 if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1 loop t loop list let mutable result = [] for group in dict do - result <- (group.Key.Value, group.Value) :: result + result <- (getKey group.Key, group.Value) :: result result |> rev + // We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance + let countByValueType (projection:'T->'Key) (list:'T list) = countByImpl HashIdentity.Structural<'Key> projection id list + + // Wrap a StructBox around all keys in case the key type is itself a type using null as a representation + let countByRefType (projection:'T->'Key) (list:'T list) = countByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection t)) (fun sb -> sb.Value) list + + [] + let countBy (projection:'T->'Key) (list:'T list) = +#if FX_ATLEAST_40 + if typeof<'Key>.IsValueType + then countByValueType projection list + else countByRefType projection list +#else + countByRefType projection source +#endif + [] let map f list = Microsoft.FSharp.Primitives.Basics.List.map f list From 02e6d424ea885785dd49f0e1beeb98519a142ba4 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Wed, 22 Jul 2015 04:55:42 +1000 Subject: [PATCH 05/10] Split List.groupBy for ValueType/RefType --- src/fsharp/FSharp.Core/list.fs | 41 +++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/fsharp/FSharp.Core/list.fs b/src/fsharp/FSharp.Core/list.fs index 48713a9fca9..531bea89b4c 100644 --- a/src/fsharp/FSharp.Core/list.fs +++ b/src/fsharp/FSharp.Core/list.fs @@ -449,31 +449,52 @@ namespace Microsoft.FSharp.Collections [] let where f x = Microsoft.FSharp.Primitives.Basics.List.filter f x - [] - let groupBy keyf (list: 'T list) = - let dict = new Dictionary,ResizeArray<'T>>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer) + let groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (list: 'T list) = + let dict = Dictionary<_,ResizeArray<_>> comparer + + // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying + // for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the + // Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means + // that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway. + let initialBucketSize = 4 // Build the groupings let rec loop list = match list with | v :: t -> - let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf v) - let ok,prev = dict.TryGetValue(key) - if ok then - prev.Add(v) + let key = keyf v + let mutable prev = Unchecked.defaultof<_> + if dict.TryGetValue(key, &prev) then + prev.Add v else - let prev = new ResizeArray<'T>(1) + let prev = ResizeArray initialBucketSize dict.[key] <- prev - prev.Add(v) + prev.Add v loop t | _ -> () loop list // Return the list-of-lists. dict - |> Seq.map (fun group -> (group.Key.Value, Seq.toList group.Value)) + |> Seq.map (fun group -> (getKey group.Key, Seq.toList group.Value)) |> Seq.toList + // We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance + let groupByValueType (keyf:'T->'Key) (list:'T list) = groupByImpl HashIdentity.Structural<'Key> keyf id list + + // Wrap a StructBox around all keys in case the key type is itself a type using null as a representation + let groupByRefType (keyf:'T->'Key) (list:'T list) = groupByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf t)) (fun sb -> sb.Value) list + + [] + let groupBy (keyf:'T->'Key) (list:'T list) = +#if FX_ATLEAST_40 + if typeof<'Key>.IsValueType + then groupByValueType keyf list + else groupByRefType keyf list +#else + groupByRefType keyf source +#endif + [] let partition p x = Microsoft.FSharp.Primitives.Basics.List.partition p x From d4b6861c065ab4ca8775a62f28d8446a79c15058 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Wed, 22 Jul 2015 05:10:39 +1000 Subject: [PATCH 06/10] Split Array.countBy/groupBy by ValueType/RefType --- src/fsharp/FSharp.Core/array.fs | 70 +++++++++++++++++++++++++-------- src/fsharp/FSharp.Core/list.fs | 4 +- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/src/fsharp/FSharp.Core/array.fs b/src/fsharp/FSharp.Core/array.fs index 8501a3f19b7..b8c0c42520c 100644 --- a/src/fsharp/FSharp.Core/array.fs +++ b/src/fsharp/FSharp.Core/array.fs @@ -172,24 +172,39 @@ namespace Microsoft.FSharp.Collections Microsoft.FSharp.Primitives.Basics.Array.subUnchecked 0 count array - [] - let countBy projection (array:'T[]) = - checkNonNull "array" array - let dict = new Dictionary,int>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer) + let countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (array:'T[]) = + let dict = Dictionary comparer // Build the groupings for v in array do - let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection v) + let key = projection v let mutable prev = Unchecked.defaultof<_> if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1 let res = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count let mutable i = 0 for group in dict do - res.[i] <- group.Key.Value, group.Value + res.[i] <- getKey group.Key, group.Value i <- i + 1 res + // We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance + let countByValueType (projection:'T->'Key) (array:'T[]) = countByImpl HashIdentity.Structural<'Key> projection id array + + // Wrap a StructBox around all keys in case the key type is itself a type using null as a representation + let countByRefType (projection:'T->'Key) (array:'T[]) = countByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection t)) (fun sb -> sb.Value) array + + [] + let countBy (projection:'T->'Key) (array:'T[]) = + checkNonNull "array" array +#if FX_ATLEAST_40 + if typeof<'Key>.IsValueType + then countByValueType projection array + else countByRefType projection array +#else + countByRefType projection array +#endif + [] let append (array1:'T[]) (array2:'T[]) = checkNonNull "array1" array1 @@ -408,32 +423,53 @@ namespace Microsoft.FSharp.Collections let rec loop i = i >= len1 || (f.Invoke(array1.[i], array2.[i]) && loop (i+1)) loop 0 - [] - let groupBy keyf (array: 'T[]) = - checkNonNull "array" array - let dict = new Dictionary,ResizeArray<'T>>(RuntimeHelpers.StructBox<'Key>.Comparer) + let groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (array: 'T[]) = + let dict = Dictionary<_,ResizeArray<_>> comparer + + // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying + // for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the + // Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means + // that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway. + let initialBucketSize = 4 // Build the groupings for i = 0 to (array.Length - 1) do let v = array.[i] - let key = RuntimeHelpers.StructBox (keyf v) - let ok, prev = dict.TryGetValue(key) - if ok then - prev.Add(v) + let key = keyf v + let mutable prev = Unchecked.defaultof<_> + if dict.TryGetValue(key, &prev) then + prev.Add v else - let prev = new ResizeArray<'T>(1) + let prev = ResizeArray initialBucketSize dict.[key] <- prev - prev.Add(v) + prev.Add v // Return the array-of-arrays. let result = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count let mutable i = 0 for group in dict do - result.[i] <- group.Key.Value, group.Value.ToArray() + result.[i] <- getKey group.Key, group.Value.ToArray() i <- i + 1 result + // We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance + let groupByValueType (keyf:'T->'Key) (array:'T[]) = groupByImpl HashIdentity.Structural<'Key> keyf id array + + // Wrap a StructBox around all keys in case the key type is itself a type using null as a representation + let groupByRefType (keyf:'T->'Key) (array:'T[]) = groupByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf t)) (fun sb -> sb.Value) array + + [] + let groupBy (keyf:'T->'Key) (array:'T[]) = + checkNonNull "array" array +#if FX_ATLEAST_40 + if typeof<'Key>.IsValueType + then groupByValueType keyf array + else groupByRefType keyf array +#else + groupByRefType keyf array +#endif + [] let pick f (array: _[]) = checkNonNull "array" array diff --git a/src/fsharp/FSharp.Core/list.fs b/src/fsharp/FSharp.Core/list.fs index 531bea89b4c..5d888107daa 100644 --- a/src/fsharp/FSharp.Core/list.fs +++ b/src/fsharp/FSharp.Core/list.fs @@ -70,7 +70,7 @@ namespace Microsoft.FSharp.Collections then countByValueType projection list else countByRefType projection list #else - countByRefType projection source + countByRefType projection list #endif [] @@ -492,7 +492,7 @@ namespace Microsoft.FSharp.Collections then groupByValueType keyf list else groupByRefType keyf list #else - groupByRefType keyf source + groupByRefType keyf list #endif [] From 23cc1564df7b9df4449ee419fd457c9f8f629564 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Wed, 22 Jul 2015 05:53:33 +1000 Subject: [PATCH 07/10] Split dict by ValueType/RefType --- src/fsharp/FSharp.Core/array.fs | 4 +- .../FSharp.Core/fslib-extra-pervasives.fs | 46 ++++++++++++------- src/fsharp/FSharp.Core/list.fs | 2 +- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/fsharp/FSharp.Core/array.fs b/src/fsharp/FSharp.Core/array.fs index b8c0c42520c..d059cf574da 100644 --- a/src/fsharp/FSharp.Core/array.fs +++ b/src/fsharp/FSharp.Core/array.fs @@ -172,7 +172,7 @@ namespace Microsoft.FSharp.Collections Microsoft.FSharp.Primitives.Basics.Array.subUnchecked 0 count array - let countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (array:'T[]) = + let inline countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (array:'T[]) = let dict = Dictionary comparer // Build the groupings @@ -423,7 +423,7 @@ namespace Microsoft.FSharp.Collections let rec loop i = i >= len1 || (f.Invoke(array1.[i], array2.[i]) && loop (i+1)) loop 0 - let groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (array: 'T[]) = + let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (array: 'T[]) = let dict = Dictionary<_,ResizeArray<_>> comparer // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying diff --git a/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs b/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs index 0fa7edf74e2..94c4b2883cb 100644 --- a/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs +++ b/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs @@ -30,19 +30,16 @@ module ExtraTopLevelOperators = [] let set l = Collections.Set.ofSeq l - [] - let dict l = - // Use a dictionary (this requires hashing and equality on the key type) - // Wrap keys in a StructBox in case they are null (when System.Collections.Generic.Dictionary fails). - let t = new Dictionary,_>(RuntimeHelpers.StructBox<'Key>.Comparer) + let inline dictImpl (comparer:IEqualityComparer<'SafeKey>) (makeSafeKey:'Key->'SafeKey) (getKey:'SafeKey->'Key) (l:seq<'Key*'T>) = + let t = Dictionary comparer for (k,v) in l do - t.[RuntimeHelpers.StructBox(k)] <- v + t.[makeSafeKey k] <- v let d = (t :> IDictionary<_,_>) let c = (t :> ICollection<_>) // Give a read-only view of the dictionary { new IDictionary<'Key, 'T> with member s.Item - with get x = d.[RuntimeHelpers.StructBox(x)] + with get x = d.[makeSafeKey x] and set x v = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))) member s.Keys = let keys = d.Keys @@ -50,45 +47,60 @@ module ExtraTopLevelOperators = member s.Add(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))); member s.Clear() = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))); member s.Remove(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))); - member s.Contains(x) = keys.Contains(RuntimeHelpers.StructBox(x)) + member s.Contains(x) = keys.Contains(makeSafeKey x) member s.CopyTo(arr,i) = let mutable n = 0 for k in keys do - arr.[i+n] <- k.Value + arr.[i+n] <- getKey k n <- n + 1 member s.IsReadOnly = true member s.Count = keys.Count interface IEnumerable<'Key> with - member s.GetEnumerator() = (keys |> Seq.map (fun v -> v.Value)).GetEnumerator() + member s.GetEnumerator() = (keys |> Seq.map getKey).GetEnumerator() interface System.Collections.IEnumerable with - member s.GetEnumerator() = ((keys |> Seq.map (fun v -> v.Value)) :> System.Collections.IEnumerable).GetEnumerator() } + member s.GetEnumerator() = ((keys |> Seq.map getKey) :> System.Collections.IEnumerable).GetEnumerator() } member s.Values = d.Values member s.Add(k,v) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))) - member s.ContainsKey(k) = d.ContainsKey(RuntimeHelpers.StructBox(k)) + member s.ContainsKey(k) = d.ContainsKey(makeSafeKey k) member s.TryGetValue(k,r) = - let key = RuntimeHelpers.StructBox(k) + let key = makeSafeKey k if d.ContainsKey(key) then (r <- d.[key]; true) else false member s.Remove(k : 'Key) = (raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))) : bool) interface ICollection> with member s.Add(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))); member s.Clear() = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))); member s.Remove(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))); - member s.Contains(KeyValue(k,v)) = c.Contains(KeyValuePair<_,_>(RuntimeHelpers.StructBox(k),v)) + member s.Contains(KeyValue(k,v)) = c.Contains(KeyValuePair<_,_>(makeSafeKey k,v)) member s.CopyTo(arr,i) = let mutable n = 0 for (KeyValue(k,v)) in c do - arr.[i+n] <- KeyValuePair<_,_>(k.Value,v) + arr.[i+n] <- KeyValuePair<_,_>(getKey k,v) n <- n + 1 member s.IsReadOnly = true member s.Count = c.Count interface IEnumerable> with member s.GetEnumerator() = - (c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(k.Value,v))).GetEnumerator() + (c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(getKey k,v))).GetEnumerator() interface System.Collections.IEnumerable with member s.GetEnumerator() = - ((c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(k.Value,v))) :> System.Collections.IEnumerable).GetEnumerator() } + ((c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(getKey k,v))) :> System.Collections.IEnumerable).GetEnumerator() } + + // We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance + let dictValueType (l:seq<'Key*'T>) = dictImpl HashIdentity.Structural<'Key> id id l + + // Wrap a StructBox around all keys in case the key type is itself a type using null as a representation + let dictRefType (l:seq<'Key*'T>) = dictImpl RuntimeHelpers.StructBox<'Key>.Comparer (fun k -> RuntimeHelpers.StructBox k) (fun sb -> sb.Value) l + [] + let dict (l:seq<'Key*'T>) = +#if FX_ATLEAST_40 + if typeof<'Key>.IsValueType + then dictValueType l + else dictRefType l +#else + dictRefType l +#endif let getArray (vals : seq<'T>) = match vals with diff --git a/src/fsharp/FSharp.Core/list.fs b/src/fsharp/FSharp.Core/list.fs index 5d888107daa..20e6370d261 100644 --- a/src/fsharp/FSharp.Core/list.fs +++ b/src/fsharp/FSharp.Core/list.fs @@ -449,7 +449,7 @@ namespace Microsoft.FSharp.Collections [] let where f x = Microsoft.FSharp.Primitives.Basics.List.filter f x - let groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (list: 'T list) = + let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (list: 'T list) = let dict = Dictionary<_,ResizeArray<_>> comparer // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying From b7884f8409d2708cd26d992bc7b0d77f788a360b Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Wed, 22 Jul 2015 17:12:30 +1000 Subject: [PATCH 08/10] Restored null arg exception as lazy I don't think this is a good way to structure exceptions, but it's to match current functionality --- src/fsharp/FSharp.Core/seq.fs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fsharp/FSharp.Core/seq.fs b/src/fsharp/FSharp.Core/seq.fs index d218694e5f1..d4e2f449228 100644 --- a/src/fsharp/FSharp.Core/seq.fs +++ b/src/fsharp/FSharp.Core/seq.fs @@ -1445,6 +1445,8 @@ namespace Microsoft.FSharp.Collections mkSeq (fun () -> source.GetEnumerator()) let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (seq:seq<'T>) = + checkNonNull "seq" seq + let dict = Dictionary<_,ResizeArray<_>> comparer // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying @@ -1479,8 +1481,6 @@ namespace Microsoft.FSharp.Collections [] let groupBy (keyf:'T->'Key) (seq:seq<'T>) = - checkNonNull "source" seq - #if FX_ATLEAST_40 if typeof<'Key>.IsValueType then mkDelayedSeq (fun () -> groupByValueType keyf seq) From 3796a552ddb98e8fa23d60616c9689fe8a62c636 Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Thu, 13 Aug 2015 05:45:38 +1000 Subject: [PATCH 09/10] Renamed key to safeKey where appropriate As per https://github.com/Microsoft/visualfsharp/pull/549#discussion-diff-36704422 --- src/fsharp/FSharp.Core/array.fs | 10 +++++----- src/fsharp/FSharp.Core/fslib-extra-pervasives.fs | 4 ++-- src/fsharp/FSharp.Core/list.fs | 10 +++++----- src/fsharp/FSharp.Core/seq.fs | 14 +++++++------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/fsharp/FSharp.Core/array.fs b/src/fsharp/FSharp.Core/array.fs index d059cf574da..1cd370744b1 100644 --- a/src/fsharp/FSharp.Core/array.fs +++ b/src/fsharp/FSharp.Core/array.fs @@ -177,9 +177,9 @@ namespace Microsoft.FSharp.Collections // Build the groupings for v in array do - let key = projection v + let safeKey = projection v let mutable prev = Unchecked.defaultof<_> - if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1 + if dict.TryGetValue(safeKey, &prev) then dict.[safeKey] <- prev + 1 else dict.[safeKey] <- 1 let res = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count let mutable i = 0 @@ -435,13 +435,13 @@ namespace Microsoft.FSharp.Collections // Build the groupings for i = 0 to (array.Length - 1) do let v = array.[i] - let key = keyf v + let safeKey = keyf v let mutable prev = Unchecked.defaultof<_> - if dict.TryGetValue(key, &prev) then + if dict.TryGetValue(safeKey, &prev) then prev.Add v else let prev = ResizeArray initialBucketSize - dict.[key] <- prev + dict.[safeKey] <- prev prev.Add v // Return the array-of-arrays. diff --git a/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs b/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs index 94c4b2883cb..24ba28531c6 100644 --- a/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs +++ b/src/fsharp/FSharp.Core/fslib-extra-pervasives.fs @@ -64,8 +64,8 @@ module ExtraTopLevelOperators = member s.Add(k,v) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))) member s.ContainsKey(k) = d.ContainsKey(makeSafeKey k) member s.TryGetValue(k,r) = - let key = makeSafeKey k - if d.ContainsKey(key) then (r <- d.[key]; true) else false + let safeKey = makeSafeKey k + if d.ContainsKey(safeKey) then (r <- d.[safeKey]; true) else false member s.Remove(k : 'Key) = (raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))) : bool) interface ICollection> with member s.Add(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))); diff --git a/src/fsharp/FSharp.Core/list.fs b/src/fsharp/FSharp.Core/list.fs index 20e6370d261..c68829c30c1 100644 --- a/src/fsharp/FSharp.Core/list.fs +++ b/src/fsharp/FSharp.Core/list.fs @@ -47,9 +47,9 @@ namespace Microsoft.FSharp.Collections match srcList with | [] -> () | h::t -> - let key = projection h + let safeKey = projection h let mutable prev = 0 - if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1 + if dict.TryGetValue(safeKey, &prev) then dict.[safeKey] <- prev + 1 else dict.[safeKey] <- 1 loop t loop list let mutable result = [] @@ -462,13 +462,13 @@ namespace Microsoft.FSharp.Collections let rec loop list = match list with | v :: t -> - let key = keyf v + let safeKey = keyf v let mutable prev = Unchecked.defaultof<_> - if dict.TryGetValue(key, &prev) then + if dict.TryGetValue(safeKey, &prev) then prev.Add v else let prev = ResizeArray initialBucketSize - dict.[key] <- prev + dict.[safeKey] <- prev prev.Add v loop t | _ -> () diff --git a/src/fsharp/FSharp.Core/seq.fs b/src/fsharp/FSharp.Core/seq.fs index d4e2f449228..25f780da651 100644 --- a/src/fsharp/FSharp.Core/seq.fs +++ b/src/fsharp/FSharp.Core/seq.fs @@ -1456,13 +1456,13 @@ namespace Microsoft.FSharp.Collections // Build the groupings seq |> iter (fun v -> - let key = keyf v + let safeKey = keyf v let mutable prev = Unchecked.defaultof<_> - match dict.TryGetValue (key, &prev) with + match dict.TryGetValue (safeKey, &prev) with | true -> prev.Add v | false -> let prev = ResizeArray minimumBucketSize - dict.[key] <- prev + dict.[safeKey] <- prev prev.Add v) // Trim the size of each result group, don't trim very small buckets, as excessive work, and garbage for @@ -1548,11 +1548,11 @@ namespace Microsoft.FSharp.Collections // Build the groupings source |> iter (fun v -> - let key = keyf v + let safeKey = keyf v let mutable prev = Unchecked.defaultof<_> - if dict.TryGetValue(key, &prev) - then dict.[key] <- prev + 1 - else dict.[key] <- 1) + if dict.TryGetValue(safeKey, &prev) + then dict.[safeKey] <- prev + 1 + else dict.[safeKey] <- 1) dict |> map (fun group -> (getKey group.Key, group.Value)) From 037a5e1de652ac1873faddbecf0e89a19b0e869f Mon Sep 17 00:00:00 2001 From: Paul Westcott Date: Thu, 13 Aug 2015 05:50:29 +1000 Subject: [PATCH 10/10] Using default constructor for ResizeArray Initially this had been set to 1, I had changed it to 4, but after discussion it was decided that the default is probably the correct choice. As per https://github.com/Microsoft/visualfsharp/pull/549#issuecomment-129954146 --- src/fsharp/FSharp.Core/array.fs | 8 +------- src/fsharp/FSharp.Core/list.fs | 8 +------- src/fsharp/FSharp.Core/seq.fs | 2 +- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/fsharp/FSharp.Core/array.fs b/src/fsharp/FSharp.Core/array.fs index 1cd370744b1..3d45213dba7 100644 --- a/src/fsharp/FSharp.Core/array.fs +++ b/src/fsharp/FSharp.Core/array.fs @@ -426,12 +426,6 @@ namespace Microsoft.FSharp.Collections let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (array: 'T[]) = let dict = Dictionary<_,ResizeArray<_>> comparer - // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying - // for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the - // Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means - // that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway. - let initialBucketSize = 4 - // Build the groupings for i = 0 to (array.Length - 1) do let v = array.[i] @@ -440,7 +434,7 @@ namespace Microsoft.FSharp.Collections if dict.TryGetValue(safeKey, &prev) then prev.Add v else - let prev = ResizeArray initialBucketSize + let prev = ResizeArray () dict.[safeKey] <- prev prev.Add v diff --git a/src/fsharp/FSharp.Core/list.fs b/src/fsharp/FSharp.Core/list.fs index c68829c30c1..a24fd283b14 100644 --- a/src/fsharp/FSharp.Core/list.fs +++ b/src/fsharp/FSharp.Core/list.fs @@ -452,12 +452,6 @@ namespace Microsoft.FSharp.Collections let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (list: 'T list) = let dict = Dictionary<_,ResizeArray<_>> comparer - // Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying - // for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the - // Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means - // that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway. - let initialBucketSize = 4 - // Build the groupings let rec loop list = match list with @@ -467,7 +461,7 @@ namespace Microsoft.FSharp.Collections if dict.TryGetValue(safeKey, &prev) then prev.Add v else - let prev = ResizeArray initialBucketSize + let prev = ResizeArray () dict.[safeKey] <- prev prev.Add v loop t diff --git a/src/fsharp/FSharp.Core/seq.fs b/src/fsharp/FSharp.Core/seq.fs index 25f780da651..dc151337e36 100644 --- a/src/fsharp/FSharp.Core/seq.fs +++ b/src/fsharp/FSharp.Core/seq.fs @@ -1461,7 +1461,7 @@ namespace Microsoft.FSharp.Collections match dict.TryGetValue (safeKey, &prev) with | true -> prev.Add v | false -> - let prev = ResizeArray minimumBucketSize + let prev = ResizeArray () dict.[safeKey] <- prev prev.Add v)