Skip to content

Commit

Permalink
Split file
Browse files Browse the repository at this point in the history
  • Loading branch information
ehmicky committed Nov 14, 2021
1 parent fb842f0 commit d130543
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 60 deletions.
61 changes: 2 additions & 59 deletions src/stats/env_dev/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { getChiSquaredValue } from '../critical_values/chi_squared.js'
import { getMean, getSum } from '../sum.js'
import { getVariance } from '../variance.js'

import { getGroupsCount, getClusterSizes } from './size.js'

export const getEnvDev = function (
array,
{
Expand Down Expand Up @@ -38,65 +40,6 @@ const returnTrue = function () {
return true
}

// Retrieve the number of groups to compute.
// Groups are divided into clusters of elements.
// Each group has `CLUSTER_FACTOR` more elements per cluster than the previous
// one.
const getGroupsCount = function (length) {
return Math.floor(
Math.log(length / MIN_GROUP_SIZE) / Math.log(CLUSTER_FACTOR),
)
}

// Retrieve the `clusterSize`, i.e. number of elements per cluster, of each
// group.
const getClusterSizes = function (groupsCount) {
return Array.from({ length: groupsCount }, getClusterSize)
}

const getClusterSize = function (_, index) {
return CLUSTER_FACTOR ** (index + 1)
}

// Minimum `groupSize`
// A higher value lowers accuracy:
// - The result `envDev` will be lower than the real value
// - This is because more `array` elements are required to reach the "optimal"
// size.
// - This means multiplying this constant by `n` requires running the benchmark
// `n` times longer to get the same `envDev`
// A lower value lowers precision:
// - This is because groups with a lower groupSize are less precise
// - This is especially visible in preview mode, especially when a new group
// is added
// - This is because the last group are less precise.
// - Also, new groups generally have higher `varianceRatio` if the "optimal"
// size has not been reached yet, so each new group will make `envDev`
// increase until it reaches its optimal value.
// In general, `envDev` tends to be generally too low, so we favor accuracy over
// precision.
// However, this does mean `envDev` tends to vary quite a lot between different
// `array`.
const MIN_GROUP_SIZE = 2

// Each group has `CLUSTER_FACTOR` more elements per cluster than the previous
// one.
// A lower value:
// - Is slower to compute
// - Using `CLUSTER_FACTOR ** n` divides the time complexity by `sqrt(n)`
// - Leads to an overall slightly worse accuracy
// A higher value:
// - Leads to much poorer accuracy and precision when the "optimal" size is
// close to the `array.length`
// - Specifically when that "optimal" size is higher than
// `array.length` / (CLUSTER_FACTOR ** 2)
// We must also ensure that `CLUSTER_FACTOR ** MAX_ARGUMENTS >= MAX_SAMPLES`
// - MAX_ARGUMENTS is the maximum number of arguments to Math.max(): 123182
// - MAX_SAMPLES is the maximum number of array elements: 123182
// - Otherwise, `Math.max(...groups)` would crash
// Using an integer >= 2 allows several implementation performance optimizations
export const CLUSTER_FACTOR = 2

// For each group, slice `array` into several clusters containing exactly
// `clusterSize` elements. Each group has a specific `clusterSize`.
// Then, sum the elements of each cluster and compute the variance of all
Expand Down
58 changes: 58 additions & 0 deletions src/stats/env_dev/size.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Retrieve the number of groups to compute.
// Groups are divided into clusters of elements.
// Each group has `CLUSTER_FACTOR` more elements per cluster than the previous
// one.
export const getGroupsCount = function (length) {
return Math.floor(
Math.log(length / MIN_GROUP_SIZE) / Math.log(CLUSTER_FACTOR),
)
}

// Retrieve the `clusterSize`, i.e. number of elements per cluster, of each
// group.
export const getClusterSizes = function (groupsCount) {
return Array.from({ length: groupsCount }, getClusterSize)
}

const getClusterSize = function (_, index) {
return CLUSTER_FACTOR ** (index + 1)
}

// Minimum `groupSize`
// A higher value lowers accuracy:
// - The result `envDev` will be lower than the real value
// - This is because more `array` elements are required to reach the "optimal"
// size.
// - This means multiplying this constant by `n` requires running the benchmark
// `n` times longer to get the same `envDev`
// A lower value lowers precision:
// - This is because groups with a lower groupSize are less precise
// - This is especially visible in preview mode, especially when a new group
// is added
// - This is because the last group are less precise.
// - Also, new groups generally have higher `varianceRatio` if the "optimal"
// size has not been reached yet, so each new group will make `envDev`
// increase until it reaches its optimal value.
// In general, `envDev` tends to be generally too low, so we favor accuracy over
// precision.
// However, this does mean `envDev` tends to vary quite a lot between different
// `array`.
const MIN_GROUP_SIZE = 2

// Each group has `CLUSTER_FACTOR` more elements per cluster than the previous
// one.
// A lower value:
// - Is slower to compute
// - Using `CLUSTER_FACTOR ** n` divides the time complexity by `sqrt(n)`
// - Leads to an overall slightly worse accuracy
// A higher value:
// - Leads to much poorer accuracy and precision when the "optimal" size is
// close to the `array.length`
// - Specifically when that "optimal" size is higher than
// `array.length` / (CLUSTER_FACTOR ** 2)
// We must also ensure that `CLUSTER_FACTOR ** MAX_ARGUMENTS >= MAX_SAMPLES`
// - MAX_ARGUMENTS is the maximum number of arguments to Math.max(): 123182
// - MAX_SAMPLES is the maximum number of array elements: 123182
// - Otherwise, `Math.max(...groups)` would crash
// Using an integer >= 2 allows several implementation performance optimizations
export const CLUSTER_FACTOR = 2
3 changes: 2 additions & 1 deletion src/stats/env_dev/try.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import { getQuantile } from '../quantile.js'
import { getMean } from '../sum.js'
import { getVariance } from '../variance.js'

import { getEnvDev, CLUSTER_FACTOR } from './main.js'
import { getEnvDev } from './main.js'
import { getSamples } from './samples.js'
import { CLUSTER_FACTOR } from './size.js'

const sortNumbers = function (numA, numB) {
return numA - numB
Expand Down

0 comments on commit d130543

Please sign in to comment.