# Testing different schemes for encoding ids

## Imports

In [1]:
const convertHrtime = require('convert-hrtime');

In [2]:
const d3array = require('d3-array')

## Timing code

In [3]:
export function timeit(n: number, f: any, args: any[]) {
    let sum = 0.0;
    for (i=0; i<n; i++) {
        start = process.hrtime();
        f.apply(null, args);
        end = process.hrtime(start);
        sum += convertHrtime(end).milliseconds;
    }
    return sum/n;
}

[Function: timeit]

## ID generation functions

In [33]:
namespace Private {

  export
  function createTriplet(path: number, version: number, store: number): string {
    // Split the path into 16-bit values.
    let pc = path & 0xFFFF;
    let pb = (((path - pc) / 0x10000) | 0) & 0xFFFF;
    let pa = (((path - pb - pc) / 0x100000000) | 0) & 0xFFFF;

    // Split the version into 16-bit values.
    let vc = version & 0xFFFF;
    let vb = (((version - vc) / 0x10000) | 0) & 0xFFFF;
    let va = (((version - vb - vc) / 0x100000000) | 0) & 0xFFFF;

    // Split the store id into 16-bit values.
    let sb = store & 0xFFFF;
    let sa = (((store - sb) / 0x10000) | 0) & 0xFFFF;

    // Convert the parts into a string identifier triplet.
    return String.fromCharCode(pa, pb, pc, va, vb, vc, sa, sb);
  }

  export
  function idTripletCount(id: string): number {
    return id.length >> 3;
  }

  export
  function idPathAt(id: string, i: number): number {
    let j = i << 3;
    let a = id.charCodeAt(j + 0);
    let b = id.charCodeAt(j + 1);
    let c = id.charCodeAt(j + 2);
    return a * 0x100000000 + b * 0x10000 + c;
  }

  export
  function idVersionAt(id: string, i: number): number {
    let j = i << 3;
    let a = id.charCodeAt(j + 3);
    let b = id.charCodeAt(j + 4);
    let c = id.charCodeAt(j + 5);
    return a * 0x100000000 + b * 0x10000 + c;
  }

  export
  function idStoreAt(id: string, i: number): number {
    let j = i << 3;
    let a = id.charCodeAt(j + 6);
    let b = id.charCodeAt(j + 7);
    return a * 0x10000 + b;
  }

  export
  function randomPath(min: number, max: number): number {
    return min + Math.round(Math.random() * Math.sqrt(max - min));
  }
    
  export
  function generatePaths(n: number, min: number, max: number): number[] {
      let m = max - min;
      let delta = m/(n+1);
      console.log(m,n+1,m/(n+1),delta);
      let paths = []
      for (let i = 1; i <= n; i++) {
          paths.push(Math.floor(min + i*delta));
      }
      console.log(paths);
      return paths
  }
}

In [5]:
Private.generatePaths(1, 0, 11)

11 2 5.5 5.5


[ 5 ]

In [6]:
export
function createDuplexId(version: number, store: number): string {
  // Split the version into 16-bit values.
  let vc = version & 0xFFFF;
  let vb = (((version - vc) / 0x10000) | 0) & 0xFFFF;
  let va = (((version - vb - vc) / 0x100000000) | 0) & 0xFFFF;

  // Split the store id into 16-bit values.
  let sb = store & 0xFFFF;
  let sa = (((store - sb) / 0x10000) | 0) & 0xFFFF;

  // Convert the parts into a string identifier duplex.
  return String.fromCharCode(va, vb, vc, sa, sb);
}

[Function: createDuplexId]

In [34]:
export
function createTriplexIds(n: number, version: number, store: number, lower: string, upper: string): string[] {
  let ids: string[] = [];

    whileIds: while (ids.length < n) {
        const MAX_PATH = 0xFFFFFFFFFFFF;
        let id = '';
        let lowerCount = lower ? Private.idTripletCount(lower) : 0;
        let upperCount = upper ? Private.idTripletCount(upper) : 0;
        let p = 1 ? lowerCount+upperCount===0 : Math.max(lowerCount, upperCount);

        forCount: for (let i = 0; i < p; ++i) {
            let lp: number;
            let lc: number;
            let ls: number;
            if (i >= lowerCount) {
              lp = 0;
              lc = 0;
              ls = 0;
            } else {
              lp = Private.idPathAt(lower, i);
              lc = Private.idVersionAt(lower, i);
              ls = Private.idStoreAt(lower, i);
            }
            let up: number;
            let uc: number;
            let us: number;
            if (i >= upperCount) {
              up = upperCount === 0 ? MAX_PATH + 1 : 0;
              uc = 0;
              us = 0;
            } else {
              up = Private.idPathAt(upper, i);
              uc = Private.idVersionAt(upper, i);
              us = Private.idStoreAt(upper, i);
            }

            // lower === upper
            if (lp === up && lc === uc && ls === us) {
              id += Private.createTriplet(lp, lc, ls);
              continue forCount;
            }

            if ((up - lp - 1) >= (n - ids.length)) {
                let paths = Private.generatePaths(n, lp, up)
                for (let j = 0, m = n-ids.length; j < m; j++) {
                    ids.push(id + Private.createTriplet(paths[j], version, store));
                }
                return ids;
            }

            id += Private.createTriplet(lp, lc, ls);
            upperCount = 0;
        } // forCount

        let np = Private.generatePaths(1, 1, MAX_PATH);
        id += Private.createTriplet(np, version, store);
        ids.push(id.slice());
        id = '';
  } // whileIds

  return ids;
}

[Function: createTriplexIds]

In [99]:
lower = Private.createTriplet(0,1,1)
upper = Private.createTriplet(2,2,1)

'\u0000\u0000\u0002\u0000\u0000\u0002\u0000\u0001'

In [100]:
lower

'\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0001'

In [101]:
upper

'\u0000\u0000\u0002\u0000\u0000\u0002\u0000\u0001'

In [102]:
createTriplexIds(1, 3, 1, lower, upper)

2 2 1 1


[ '\u0000\u0000\u0001\u0000\u0000\u0003\u0000\u0001' ]

In [None]:
Private.createTriplet()

In [25]:
value = ''
remove = 0
index = 0
text = 'abc'

'abc'

In [26]:
index = Math.min(index, value.length)

0

In [27]:
count = Math.min(remove, value.length - index)

0

In [28]:
metadata_ids = []

[]

In [29]:
lower = index === 0 ? '' : metadata_ids[index - 1];
upper = index === value.length ? '' : metadata_ids[index];

''

In [30]:
lower

''

In [31]:
upper

''

In [35]:
createTriplexIds(text.length, 1, 1, lower, upper)

281474976710656 4 70368744177664 70368744177664
[ 70368744177664, 140737488355328, 211106232532992 ]


[
  '䀀\u0000\u0000\u0000\u0000\u0001\u0000\u0001',
  '耀\u0000\u0000\u0000\u0000\u0001\u0000\u0001',
  '쀀\u0000\u0000\u0000\u0000\u0001\u0000\u0001'
]

In [21]:
lowerCount = lower ? Private.idTripletCount(lower) : 0;
upperCount = upper ? Private.idTripletCount(upper) : 0;

0

In [22]:
lowerCount

0

In [23]:
upperCount

0

## Base64 encoding

In [None]:
export function encodeBase64(input: string): string {
    const buffer = Buffer.from(input);
    return buffer.toString('base64');
}

In [None]:
export function decodeBase64(input: string): string {
    return Buffer.from(input, 'base64').toString()
}

## Regular expression (Ian's PR)

In [None]:
const HS_L = '\uD800';
const HS_U = '\uDBFF';
const LS_L = '\uDC00';
const LS_U = '\uDFFF';
const LS_REGEX = new RegExp(`([${LS_L}-${LS_U}])`, 'g');
const UNPAIRED_HS_REGEX = new RegExp(
    `([${HS_L}-${HS_U}])(?![${LS_L}-${LS_U}])`,
    'g',
);
const PAIRED_LS_REGEX = new RegExp(`X${HS_L}([${LS_L}-${LS_U}])`, 'g');
const PAIRED_HS_REGEX = new RegExp(`([${HS_L}-${HS_U}])${LS_L}X`, 'g');

export
function stripSurrogates(id: string): string {
    return id.replace(PAIRED_LS_REGEX, '$1').replace(PAIRED_HS_REGEX, '$1');
}

export
function generateIdString(str: string): string {
    str = str.replace(LS_REGEX, `X${HS_L}$1`);
    str = str.replace(UNPAIRED_HS_REGEX, `$1${LS_L}X`);
    return str;
}

export
function stringToCharCodes(s: string): number[] {
    result = new Array<number>();
    for (i=0; i<s.length; i++) {
        result.push(s.charCodeAt(i))
    }
    return result
}

## Testing

In [None]:
ids = createTriplexIds(2**10,1,1)

In [None]:
timesA = ids.map(item => {
    return timeit(100, encodeBase64, [item])
})

In [None]:
meanA = d3array.mean(timesA)

In [None]:
timesB = ids.map(item => {
    return timeit(100, generateIdString, [item])
})

In [None]:
meanB = d3array.mean(timesB)

In [None]:
meanB/meanA

In [None]:
patchIds = createTriplexIds(2**13, 1, 1, ids[0], ids[1])

## Scratch

In [6]:
10-2

8

In [8]:
Math.floor(8/3)

2

In [9]:
2+ 2

4

In [10]:
2+2+2

6

In [None]:
2+2+@

SyntaxError: unknown: Support for the experimental syntax 'decorators-legacy' isn't currently enabled (1:5):

[0m[31m[1m>[22m[39m[90m 1 | [39m[35m2[39m[33m+[39m[35m2[39m[33m+[39m[33m@[39m[0m
[0m [90m   | [39m    [31m[1m^[22m[39m[0m