Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Alternative Array.Filter implementation #1479

Merged
merged 3 commits into from
Aug 30, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,64 @@ type ArrayModule() =
let nullArr = null:string[]
CheckThrowsArgumentNullException (fun () -> Array.filter funcStr nullArr |> ignore)

()
()

[<Test>]
member this.Filter2 () =
// The Array.filter algorith uses a bitmask as a temporary storage mechanism
// for which elements to filter. This introduces some possible error conditions
// around how the filter is filled and subsequently used, so filter test
// does a pretty exhaustive test suite.
// It works by first generating arrays which consist of sequences of unique
// positive and negative numbers, as per arguments, it then filters for the
// positive values, and then compares the results agains the original array.

let makeTestArray size posLength negLength startWithPos startFromEnd =
let array = Array.zeroCreate size

let mutable sign = if startWithPos then 1 else -1
let mutable count = if startWithPos then posLength else negLength
for i = 1 to size do
let idx = if startFromEnd then size-i else i-1
array.[idx] <- (idx+1) * sign
count <- count - 1
if count <= 0 then
sign <- sign * -1
count <- if sign > 0 then posLength else negLength

array

let checkFilter filter (array:array<_>) =
let filtered = array |> filter (fun n -> n > 0)

let mutable idx = 0
for item in filtered do
while array.[idx] < item do
idx <- idx + 1
if item <> array.[idx] then
Assert.Fail ()
idx <- idx + 1
while idx < array.Length do
if array.[idx] > 0 then
Assert.Fail ()
idx <- idx + 1

let checkCombinations filter maxSize =
for size = 0 to maxSize do
for posLength = 1 to size do
for negLength = 1 to size do
for startWithPos in [true; false] do
for startFromEnd in [true; false] do
let testArray = makeTestArray size posLength negLength startWithPos startFromEnd
checkFilter filter testArray

// this could probably be a bit smaller, but needs to at least be > 64 to test chunk copying
// of data, and > 96 gives a safer feel, so settle on a nice decimal rounding of one hundred
// to appease those with digits.
let suitableTestMaxLength = 100

checkCombinations Array.filter suitableTestMaxLength



[<Test>]
Expand Down
198 changes: 162 additions & 36 deletions src/fsharp/FSharp.Core/array.fs
Original file line number Diff line number Diff line change
Expand Up @@ -518,44 +518,170 @@ namespace Microsoft.FSharp.Collections
else
empty

// The filter module is a space and performance for Array.filter based optimization that uses
// a bitarray to store the results of the filtering of every element of the array. This means
// that the only additional temporary garbage that needs to be allocated is {array.Length/8} bytes.
//
// Other optimizations include:
// - arrays < 32 elements don't allocate any garbage at all
// - when the predicate yields consecutive runs of true data that is >= 32 elements (and fall
// into maskArray buckets) are copied in chunks using System.Array.Copy
module Filter =
let private populateMask<'a> (f:'a->bool) (src:array<'a>) (maskArray:array<uint32>) =
let mutable count = 0
for maskIdx = 0 to maskArray.Length-1 do
let srcIdx = maskIdx * 32
let mutable mask = 0u
if f src.[srcIdx+0x00] then mask <- mask ||| (1u <<< 0x00); count <- count + 1
if f src.[srcIdx+0x01] then mask <- mask ||| (1u <<< 0x01); count <- count + 1
if f src.[srcIdx+0x02] then mask <- mask ||| (1u <<< 0x02); count <- count + 1
if f src.[srcIdx+0x03] then mask <- mask ||| (1u <<< 0x03); count <- count + 1
if f src.[srcIdx+0x04] then mask <- mask ||| (1u <<< 0x04); count <- count + 1
if f src.[srcIdx+0x05] then mask <- mask ||| (1u <<< 0x05); count <- count + 1
if f src.[srcIdx+0x06] then mask <- mask ||| (1u <<< 0x06); count <- count + 1
if f src.[srcIdx+0x07] then mask <- mask ||| (1u <<< 0x07); count <- count + 1
if f src.[srcIdx+0x08] then mask <- mask ||| (1u <<< 0x08); count <- count + 1
if f src.[srcIdx+0x09] then mask <- mask ||| (1u <<< 0x09); count <- count + 1
if f src.[srcIdx+0x0A] then mask <- mask ||| (1u <<< 0x0A); count <- count + 1
if f src.[srcIdx+0x0B] then mask <- mask ||| (1u <<< 0x0B); count <- count + 1
if f src.[srcIdx+0x0C] then mask <- mask ||| (1u <<< 0x0C); count <- count + 1
if f src.[srcIdx+0x0D] then mask <- mask ||| (1u <<< 0x0D); count <- count + 1
if f src.[srcIdx+0x0E] then mask <- mask ||| (1u <<< 0x0E); count <- count + 1
if f src.[srcIdx+0x0F] then mask <- mask ||| (1u <<< 0x0F); count <- count + 1
if f src.[srcIdx+0x10] then mask <- mask ||| (1u <<< 0x10); count <- count + 1
if f src.[srcIdx+0x11] then mask <- mask ||| (1u <<< 0x11); count <- count + 1
if f src.[srcIdx+0x12] then mask <- mask ||| (1u <<< 0x12); count <- count + 1
if f src.[srcIdx+0x13] then mask <- mask ||| (1u <<< 0x13); count <- count + 1
if f src.[srcIdx+0x14] then mask <- mask ||| (1u <<< 0x14); count <- count + 1
if f src.[srcIdx+0x15] then mask <- mask ||| (1u <<< 0x15); count <- count + 1
if f src.[srcIdx+0x16] then mask <- mask ||| (1u <<< 0x16); count <- count + 1
if f src.[srcIdx+0x17] then mask <- mask ||| (1u <<< 0x17); count <- count + 1
if f src.[srcIdx+0x18] then mask <- mask ||| (1u <<< 0x18); count <- count + 1
if f src.[srcIdx+0x19] then mask <- mask ||| (1u <<< 0x19); count <- count + 1
if f src.[srcIdx+0x1A] then mask <- mask ||| (1u <<< 0x1A); count <- count + 1
if f src.[srcIdx+0x1B] then mask <- mask ||| (1u <<< 0x1B); count <- count + 1
if f src.[srcIdx+0x1C] then mask <- mask ||| (1u <<< 0x1C); count <- count + 1
if f src.[srcIdx+0x1D] then mask <- mask ||| (1u <<< 0x1D); count <- count + 1
if f src.[srcIdx+0x1E] then mask <- mask ||| (1u <<< 0x1E); count <- count + 1
if f src.[srcIdx+0x1F] then mask <- mask ||| (1u <<< 0x1F); count <- count + 1
maskArray.[maskIdx] <- mask
count

let private createMask<'a> (f:'a->bool) (src:array<'a>) (maskArrayOut:byref<array<uint32>>) (leftoverMaskOut:byref<uint32>) =
let maskArrayLength = src.Length / 0x20

// null when there are less than 32 items in src array.
let maskArray =
if maskArrayLength = 0 then Unchecked.defaultof<_>
else Array.zeroCreateUnchecked<uint32> maskArrayLength

let mutable count =
match maskArray with
| null -> 0
| maskArray -> populateMask f src maskArray

let leftoverMask =
match src.Length % 0x20 with
| 0 -> 0u
| _ ->
let mutable mask = 0u
let mutable elementMask = 1u
for arrayIdx = maskArrayLength*0x20 to src.Length-1 do
if f src.[arrayIdx] then mask <- mask ||| elementMask; count <- count + 1
elementMask <- elementMask <<< 1
mask

maskArrayOut <- maskArray
leftoverMaskOut <- leftoverMask
count

let private populateDstViaMask<'a> (src:array<'a>) (maskArray:array<uint32>) (dst:array<'a>) =
let mutable dstIdx = 0
let mutable batchCount = 0
for maskIdx = 0 to maskArray.Length-1 do
let mask = maskArray.[maskIdx]
if mask = 0xFFFFFFFFu then
batchCount <- batchCount + 1
else
let srcIdx = maskIdx * 0x20

if batchCount <> 0 then
let batchSize = batchCount * 0x20
System.Array.Copy (src, srcIdx-batchSize, dst, dstIdx, batchSize)
dstIdx <- dstIdx + batchSize
batchCount <- 0

if mask <> 0u then
if mask &&& (1u <<< 0x00) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x00]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x01) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x01]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x02) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x02]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x03) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x03]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x04) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x04]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x05) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x05]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x06) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x06]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x07) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x07]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x08) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x08]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x09) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x09]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x0A) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x0A]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x0B) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x0B]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x0C) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x0C]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x0D) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x0D]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x0E) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x0E]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x0F) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x0F]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x10) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x10]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x11) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x11]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x12) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x12]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x13) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x13]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x14) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x14]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x15) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x15]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x16) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x16]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x17) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x17]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x18) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x18]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x19) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x19]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x1A) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x1A]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x1B) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x1B]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x1C) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x1C]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x1D) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x1D]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x1E) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x1E]; dstIdx <- dstIdx + 1
if mask &&& (1u <<< 0x1F) <> 0u then dst.[dstIdx] <- src.[srcIdx+0x1F]; dstIdx <- dstIdx + 1

if batchCount <> 0 then
let srcIdx = maskArray.Length * 0x20
let batchSize = batchCount * 0x20
System.Array.Copy (src, srcIdx-batchSize, dst, dstIdx, batchSize)
dstIdx <- dstIdx + batchSize

dstIdx

let private filterViaMask (maskArray:array<uint32>) (leftoverMask:uint32) (count:int) (src:array<_>) =
let dst = Array.zeroCreateUnchecked count

let mutable dstIdx = 0
let srcIdx =
match maskArray with
| null -> 0
| _ ->
dstIdx <- populateDstViaMask src maskArray dst
maskArray.Length*0x20

let mutable elementMask = 1u
for srcIdx = srcIdx to src.Length-1 do
if leftoverMask &&& elementMask <> 0u then dst.[dstIdx] <- src.[srcIdx]; dstIdx <- dstIdx + 1
elementMask <- elementMask <<< 1

dst

let filter f (src:array<_>) =
let mutable maskArray = Unchecked.defaultof<_>
let mutable leftOverMask = Unchecked.defaultof<_>
match createMask f src &maskArray &leftOverMask with
| 0 -> empty
| count -> filterViaMask maskArray leftOverMask count src

[<CompiledName("Filter")>]
let filter f (array: _[]) =
checkNonNull "array" array
let mutable i = 0
while i < array.Length && not (f array.[i]) do
i <- i + 1

if i <> array.Length then
let mutable element = array.[i]
let chunk1 : 'T[] = Array.zeroCreateUnchecked (((array.Length-i) >>> 2) + 1)
let mutable count = 1
chunk1.[0] <- element
i <- i + 1
while count < chunk1.Length && i < array.Length do
element <- array.[i]
if f element then
chunk1.[count] <- element
count <- count + 1
i <- i + 1

if i < array.Length then
let chunk2 = Array.zeroCreateUnchecked (array.Length-i)
count <- 0
while i < array.Length do
element <- array.[i]
if f element then
chunk2.[count] <- element
count <- count + 1
i <- i + 1

let res : 'T[] = Array.zeroCreateUnchecked (chunk1.Length + count)
Array.Copy(chunk1,res,chunk1.Length)
Array.Copy(chunk2,0,res,chunk1.Length,count)
res
else
Array.subUnchecked 0 count chunk1
else empty

checkNonNull "array" array
Filter.filter f array

[<CompiledName("Where")>]
let where f (array: _[]) = filter f array
Expand Down