Skip to content

Commit

Permalink
add map merge
Browse files Browse the repository at this point in the history
  • Loading branch information
bvenn committed Apr 26, 2023
1 parent 3d6a220 commit 44c8071
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 7 deletions.
53 changes: 52 additions & 1 deletion src/FSharp.Stats/Distributions/Empirical.fs
@@ -1,5 +1,7 @@
namespace FSharp.Stats.Distributions

open FSharp.Stats

/// Represents a probability mass function (map from values to probabilities).
module Empirical =
open System
Expand Down Expand Up @@ -183,15 +185,64 @@ module Empirical =
)
|> Map.ofSeq
|> normalize

/// <summary>Merges two maps into a single map. If a key exists in both maps, the value is determined by f with the first value being from mapA and the second originating from mapB.</summary>
/// <param name="f">Function to transform values if key is present in both histograms. `histA-value &#8594; histB-value &#8594; newValue`</param>
/// <param name="mapA">Empirical distribution A</param>
/// <param name="mapB">Empirical distribution B</param>
/// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
/// <remarks>This function is not commutative! (mergeBy f a b) is not equal to (mergeBy f b a)</remarks>
/// <returns>New frequency map that results from merged maps mapA and mapB. Values from keys that are present in both maps are handled by f</returns>
let mergeBy (f: 'value -> 'value -> 'value) (histA: Map<_,'value>) (histB: Map<_,'value>) =
Map.mergeBy f histA histB

type EmpiricalDistribution() =
/// <summary>Merges two maps into a single map. If a key exists in both maps, the value in histA is superseded by the value in histB.</summary>
/// <param name="histA">Empirical distribution A</param>
/// <param name="histB">Empirical distribution B</param>
/// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
/// <remarks>This function is not commutative! (merge a b) is not equal to (merge b a)</remarks>
/// <returns>New frequency map that results from merged maps histA and histB.</returns>
let merge (histA: Map<_,'value>) (histB: Map<_,'value>) =
Map.merge histA histB

/// <summary>Merges two maps into a single map. If a key exists in both maps, the value from mapB is added to the value of mapA.</summary>
/// <param name="histA">Empirical distribution A</param>
/// <param name="histB">Empirical distribution B</param>
/// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
/// <remarks>This function is not commutative! (add a b) is not equal to (add b a)</remarks>
/// <returns>New frequency map that results from merged maps histA and histB. Values from keys that are present in both maps are handled by f</returns>
let inline add (histA: Map<_,'value>) (histB: Map<_,'value>) =
Map.mergeAdd histA histB


type EmpiricalDistribution() =

/// Creates probability mass function of the input sequence.
/// The bandwidth defines the width of the bins the numbers are sorted into.
/// Bin intervals are half open excluding the upper border: [lower,upper)
static member create(bandwidth: float) =
fun (data: seq<float>) ->
Empirical.create bandwidth data

///// <summary>Merges two maps into a single map. If a key exists in both maps, the value in histA is superseded by the value in histB.</summary>
///// <param name="histA">Empirical distribution A</param>
///// <param name="histB">Empirical distribution B</param>
///// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
///// <remarks>This function is not commutative! (merge a b) is not equal to (merge b a)</remarks>
///// <returns>New frequency map that results from merged maps histA and histB.</returns>
//static member merge: ((Map<_,float> -> Map<_,float> -> Map<_,float>)) =
// fun histA histB ->
// Empirical.merge histA histB

///// <summary>Merges two maps into a single map. If a key exists in both maps, the value from mapB is added to the value of mapA.</summary>
///// <param name="histA">Empirical distribution A</param>
///// <param name="histB">Empirical distribution B</param>
///// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
///// <remarks>This function is not commutative! (add a b) is not equal to (add b a)</remarks>
///// <returns>New frequency map that results from merged maps histA and histB. Values from keys that are present in both maps are handled by f</returns>
//static member add: ((Map<_,float> -> Map<_,float> -> Map<_,float>)) =
// fun histA histB ->
// Empirical.add histA histB

/// Creates probability mass function of the categories in the input sequence.
/// A template defines the search space to exclude certain elements or to include elements that are not in the input sequence.
Expand Down
31 changes: 25 additions & 6 deletions src/FSharp.Stats/Distributions/Frequency.fs
Expand Up @@ -2,6 +2,7 @@ namespace FSharp.Stats.Distributions

/// Represents a histogram (map from values to integer frequencies).
module Frequency =
open FSharp.Stats

/// Given the list [a,b,a,c,b,b], produce a map {a:2, b:3, c:1} which contains the count of each unique item in the list
let createGeneric list =
Expand Down Expand Up @@ -66,12 +67,30 @@ module Frequency =
| [] -> true
issubset (histA |> Map.toList) histB

///// Subtracts the values histogramA from histogramB
//let subtract (histA:Map<'a,int>) (histB:Map<'a,int>) =
// Map.merge histA histB (fun k (v, v') -> v - v')

////// Adds the values in histogramA to histogramB
//let add (histA:Map<'a,int>) (histB:Map<'a,int>) =
// Map.merge histA histB (fun k (v, v') -> v + v')
/// <summary>Merges two maps into a single map. If a key exists in both maps, the value in histA is superseded by the value in histB.</summary>
/// <param name="histA">Frequency map A</param>
/// <param name="histB">Frequency map B</param>
/// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
/// <remarks>This function is not commutative! (merge a b) is not equal to (merge b a)</remarks>
/// <returns>New frequency map that results from merged maps histA and histB.</returns>
let merge (histA: Map<_,'value>) (histB: Map<_,'value>) =
Map.merge histA histB

/// <summary>Merges two maps into a single map. If a key exists in both maps, the value from histB is subtracted from the value of histA.</summary>
/// <param name="histA">Frequency map A</param>
/// <param name="histB">Frequency map B</param>
/// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
/// <remarks>This function is not commutative! (subtract a b) is not equal to (subtract b a)</remarks>
let inline subtract (histA: Map<_,'value>) (histB: Map<_,'value>) =
Map.mergeSubtract histA histB

/// <summary>Merges two maps into a single map. If a key exists in both maps, the value from mapB is added to the value of mapA.</summary>
/// <param name="histA">Frequency map A</param>
/// <param name="histB">Frequency map B</param>
/// <remarks>When applied to continuous data the bandwidths must be equal!</remarks>
/// <remarks>This function is not commutative! (add a b) is not equal to (add b a)</remarks>
/// <returns>New frequency map that results from merged maps histA and histB. Values from keys that are present in both maps are handled by f</returns>
let inline add (histA: Map<_,'value>) (histB: Map<_,'value>) =
Map.mergeAdd histA histB

1 change: 1 addition & 0 deletions src/FSharp.Stats/FSharp.Stats.fsproj
Expand Up @@ -45,6 +45,7 @@
<Compile Include="Seq.fs" />
<Compile Include="Array.fs" />
<Compile Include="List.fs" />
<Compile Include="Map.fs" />
<Compile Include="JaggedArray.fs" />
<Compile Include="Vector.fs" />
<Compile Include="RowVector.fs" />
Expand Down

0 comments on commit 44c8071

Please sign in to comment.