Skip to content

Commit

Permalink
Merge 9b22014 into 5a5f04d
Browse files Browse the repository at this point in the history
  • Loading branch information
PragTob committed Jul 4, 2019
2 parents 5a5f04d + 9b22014 commit e71fe80
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 40 deletions.
143 changes: 112 additions & 31 deletions lib/statistex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ defmodule Statistex do
defstruct [
:total,
:average,
:variance,
:standard_deviation,
:standard_deviation_ratio,
:median,
:percentiles,
:frequency_distribution,
:mode,
:minimum,
:maximum,
Expand All @@ -36,10 +38,12 @@ defmodule Statistex do
@type t :: %__MODULE__{
total: number,
average: float,
variance: float,
standard_deviation: float,
standard_deviation_ratio: float,
median: number,
percentiles: percentiles,
frequency_distribution: %{sample => pos_integer},
mode: mode,
minimum: number,
maximum: number,
Expand Down Expand Up @@ -92,10 +96,18 @@ defmodule Statistex do
iex> Statistex.statistics([200, 400, 400, 400, 500, 500, 500, 700, 900])
%Statistex{
average: 500.0,
variance: 40_000.0,
standard_deviation: 200.0,
standard_deviation_ratio: 0.4,
median: 500.0,
percentiles: %{50 => 500.0},
frequency_distribution: %{
200 => 1,
400 => 3,
500 => 3,
700 => 1,
900 => 1
},
mode: [500, 400],
minimum: 200,
maximum: 900,
Expand All @@ -109,10 +121,12 @@ defmodule Statistex do
iex> Statistex.statistics([0, 0, 0, 0])
%Statistex{
average: 0.0,
variance: 0.0,
standard_deviation: 0.0,
standard_deviation_ratio: 0.0,
median: 0.0,
percentiles: %{50 => 0.0},
frequency_distribution: %{0 => 4},
mode: 0,
minimum: 0,
maximum: 0,
Expand All @@ -132,26 +146,26 @@ defmodule Statistex do
total = total(samples)
sample_size = length(samples)
average = average(samples, total: total, sample_size: sample_size)
standard_deviation = standard_deviation(samples, average: average, sample_size: sample_size)
variance = variance(samples, average: average, sample_size: sample_size)
standard_deviation = standard_deviation(samples, variance: variance)

standard_deviation_ratio =
standard_deviation_ratio(
samples,
average: average,
standard_deviation: standard_deviation
)
standard_deviation_ratio(samples, standard_deviation: standard_deviation)

percentiles = calculate_percentiles(samples, configuration)
median = median(samples, percentiles: percentiles)

frequency_distribution = frequency_distribution(samples)

%__MODULE__{
total: total,
average: average,
variance: variance,
standard_deviation: standard_deviation,
standard_deviation_ratio: standard_deviation_ratio,
median: median,
median: median(samples, percentiles: percentiles),
percentiles: percentiles,
mode: mode(samples),
frequency_distribution: frequency_distribution,
mode: mode(samples, frequency_distribution: frequency_distribution),
minimum: minimum(samples),
maximum: maximum(samples),
sample_size: sample_size
Expand Down Expand Up @@ -243,9 +257,9 @@ defmodule Statistex do
end

@doc """
Calculate the standard deviation.
Calculate the variance.
A measurement how much samples vary (the higher the more the samples vary).
A measurement how much samples vary (the higher the more the samples vary). This is the variance of a sample and is hence in its calculation divided by sample_size - 1 (Bessel's correction).
## Options
If already calculated, the `:average` and `:sample_size` options can be provided to avoid recalulating those values.
Expand All @@ -254,43 +268,78 @@ defmodule Statistex do
## Examples
iex> Statistex.standard_deviation([4, 9, 11, 12, 17, 5, 8, 12, 12])
4.0
iex> Statistex.variance([4, 9, 11, 12, 17, 5, 8, 12, 12])
16.0
iex> Statistex.standard_deviation([4, 9, 11, 12, 17, 5, 8, 12, 12], sample_size: 9, average: 10.0)
4.0
iex> Statistex.variance([4, 9, 11, 12, 17, 5, 8, 12, 12], sample_size: 9, average: 10.0)
16.0
iex> Statistex.standard_deviation([42])
iex> Statistex.variance([42])
0.0
iex> Statistex.standard_deviation([1, 1, 1, 1, 1, 1, 1])
iex> Statistex.variance([1, 1, 1, 1, 1, 1, 1])
0.0
iex> Statistex.standard_deviation([])
iex> Statistex.variance([])
** (ArgumentError) Passed an empty list ([]) to calculate statistics from, please pass a list containing at least on number.
"""
@spec standard_deviation(samples, keyword) :: float
def standard_deviation(samples, options \\ [])
def standard_deviation([], _), do: raise(ArgumentError, @empty_list_error_message)
@spec variance(samples, keyword) :: float
def variance(samples, options \\ [])
def variance([], _), do: raise(ArgumentError, @empty_list_error_message)

def standard_deviation(samples, options) do
def variance(samples, options) do
sample_size = Keyword.get_lazy(options, :sample_size, fn -> sample_size(samples) end)

average =
Keyword.get_lazy(options, :average, fn -> average(samples, sample_size: sample_size) end)

do_standard_deviation(samples, average, sample_size)
do_variance(samples, average, sample_size)
end

defp do_standard_deviation(_samples, _average, 1), do: 0.0
defp do_variance(_samples, _average, 1), do: 0.0

defp do_standard_deviation(samples, average, sample_size) do
defp do_variance(samples, average, sample_size) do
total_variance =
Enum.reduce(samples, 0, fn sample, total ->
total + :math.pow(sample - average, 2)
end)

variance = total_variance / (sample_size - 1)
total_variance / (sample_size - 1)
end

@doc """
Calculate the standard deviation.
A measurement how much samples vary (the higher the more the samples vary). It's the square root of the variance. Unlike the variance, its unit is the same as that of the sample (as calculating the variance includes squaring).
## Options
If already calculated, the `:variance` option can be provided to avoid recalulating those values.
`Argumenterror` is raised if the given list is empty.
## Examples
iex> Statistex.standard_deviation([4, 9, 11, 12, 17, 5, 8, 12, 12])
4.0
iex> Statistex.standard_deviation([4, 9, 11, 12, 17, 5, 8, 12, 12], variance: 16.0)
4.0
iex> Statistex.standard_deviation([42])
0.0
iex> Statistex.standard_deviation([1, 1, 1, 1, 1, 1, 1])
0.0
iex> Statistex.standard_deviation([])
** (ArgumentError) Passed an empty list ([]) to calculate statistics from, please pass a list containing at least on number.
"""
@spec standard_deviation(samples, keyword) :: float
def standard_deviation(samples, options \\ [])
def standard_deviation([], _), do: raise(ArgumentError, @empty_list_error_message)

def standard_deviation(samples, options) do
variance = Keyword.get_lazy(options, :variance, fn -> variance(samples) end)
:math.sqrt(variance)
end

Expand All @@ -299,13 +348,13 @@ defmodule Statistex do
This helps put the absolute standard deviation value into perspective expressing it relative to the average. It's what percentage of the absolute value of the average the variance takes.
`Argumenterror` is raised if the given list is empty.
## Options
If already calculated, the `:average` and `:standard_deviation` options can be provided to avoid recalulating those values.
If both values are provided, the provided samples will be ignored.
`Argumenterror` is raised if the given list is empty.
## Examples
iex> Statistex.standard_deviation_ratio([4, 9, 11, 12, 17, 5, 8, 12, 12])
Expand Down Expand Up @@ -401,13 +450,44 @@ defmodule Statistex do
defdelegate(percentiles(samples, percentiles), to: Percentile)

@doc """
Calculates the mode of the given samples.
A map showing which sample occurs how often in the samples.
Goes from a concrete occurence of the sample to the number of times it was observed in the samples.
Mode is the sample(s) that occur the most. Often one value, but can be multiple values if they occur the same amount of times. If no value occurs at least twice, this value will be nil.
## Examples
iex> Statistex.frequency_distribution([1, 2, 4.23, 7, 2, 99])
%{
2 => 2,
1 => 1,
4.23 => 1,
7 => 1,
99 => 1
}
iex> Statistex.frequency_distribution([])
** (ArgumentError) Passed an empty list ([]) to calculate statistics from, please pass a list containing at least on number.
"""
@spec frequency_distribution(samples) :: %{required(sample) => pos_integer}
def frequency_distribution([]), do: raise(ArgumentError, @empty_list_error_message)

def frequency_distribution(samples) do
Enum.reduce(samples, %{}, fn sample, counts ->
Map.update(counts, sample, 1, fn old_value -> old_value + 1 end)
end)
end

@doc """
Calculates the mode of the given samples.
Mode is the sample(s) that occur the most. Often one value, but can be multiple values if they occur the same amount of times. If no value occurs at least twice, there is no mode and it hence returns `nil`.
`Argumenterror` is raised if the given list is empty.
## Options
If already calculated, the `:frequency_distribution` option can be provided to avoid recalulating it.
## Examples
iex> Statistex.mode([5, 3, 4, 5, 1, 3, 1, 3])
Expand All @@ -423,8 +503,9 @@ defmodule Statistex do
iex> Enum.sort(mode)
[1, 3, 5]
"""
@spec mode(samples) :: mode
@spec mode(samples, keyword) :: mode
defdelegate mode(samples), to: Mode
defdelegate mode(samples, opts), to: Mode

@doc """
Calculates the median of the given samples.
Expand Down
22 changes: 13 additions & 9 deletions lib/statistex/mode.ex
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
defmodule Statistex.Mode do
@moduledoc false

@spec mode(Statistex.samples()) :: Statistex.mode()
def mode([]) do
import Statistex

@spec mode(Statistex.samples(), keyword) :: Statistex.mode()
def mode(samples, opts \\ [])

def mode([], _) do
raise(
ArgumentError,
"Passed an empty list ([]) to calculate statistics from, please pass a list containing at least on number."
)
end

def mode(samples) do
samples
|> Enum.reduce(%{}, fn sample, counts ->
Map.update(counts, sample, 1, fn old_value -> old_value + 1 end)
end)
|> max_multiple
|> decide_mode
def mode(samples, opts) do
frequencies =
Keyword.get_lazy(opts, :frequency_distribution, fn -> frequency_distribution(samples) end)

frequencies
|> max_multiple()
|> decide_mode()
end

defp max_multiple(map) do
Expand Down
25 changes: 25 additions & 0 deletions test/statistex_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ defmodule Statistex.StatistexTest do

assert stats.median == stats.percentiles[50]

assert stats.variance >= 0
assert stats.standard_deviation >= 0
assert stats.standard_deviation_ratio >= 0

Expand All @@ -58,6 +59,30 @@ defmodule Statistex.StatistexTest do
mode ->
assert mode in samples
end

frequency_distribution = stats.frequency_distribution
frequency_entry_count = map_size(frequency_distribution)

assert frequency_entry_count >= 1
assert frequency_entry_count <= stats.sample_size

# frequencies actually occur in samples
Enum.each(frequency_distribution, fn {key, value} ->
assert key in samples
assert value >= 1
assert is_integer(value)
end)

# all samples are in frequencies
Enum.each(samples, fn sample -> assert Map.has_key?(frequency_distribution, sample) end)

# counts some up to sample_size
count_sum =
frequency_distribution
|> Map.values()
|> Enum.sum()

assert count_sum == stats.sample_size
end

defp big_list_big_floats do
Expand Down

0 comments on commit e71fe80

Please sign in to comment.