Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(lib): Replace shuffling lib with own implementation #174

Merged
merged 1 commit into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

A library that aims to remove the overhead of creating tensors from CSV files completely; allowing you to dive right into the fun parts of your ML project.

- Lightweight.
- [Lightweight](https://bundlephobia.com/result?p=tensorflow-load-csv).
- Fast.
- Flexible.
- TypeScript compatible.
Expand All @@ -21,18 +21,21 @@ You can find the docs [here](https://barissencan.com/tensorflow-load-csv/).
## Installation

NPM:

```sh
npm install tensorflow-load-csv
```

Yarn:

```sh
yarn add tensorflow-load-csv
```

## Usage

Simple usage:

```js
import loadCsv from 'tensorflow-load-csv';

Expand All @@ -46,6 +49,7 @@ labels.print();
```

Advanced usage:

```js
import loadCsv from 'tensorflow-load-csv';

Expand Down
25 changes: 4 additions & 21 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,5 @@
},
"peerDependencies": {
"@tensorflow/tfjs": "^2.0.1"
},
"dependencies": {
"shuffle-seed": "^1.1.6"
}
}
2 changes: 1 addition & 1 deletion src/loadCsv.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import fs from 'fs';

import * as tf from '@tensorflow/tfjs';
import { shuffle } from 'shuffle-seed';

import { CsvReadOptions, CsvTable } from './loadCsv.models';
import filterColumns from './filterColumns';
import splitTestData from './splitTestData';
import applyMappings from './applyMappings';
import shuffle from './shuffle';

const defaultShuffleSeed = 'mncv9340ur';

Expand Down
51 changes: 51 additions & 0 deletions src/shuffle.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
const mulberry32 = (a: number) => () => {
let t = (a += 0x6d2b79f5);
t = Math.imul(t ^ (t >>> 15), t | 1);
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
};

const cyrb53 = (str: string, seed = 0) => {
let h1 = 0xdeadbeef ^ seed,
h2 = 0x41c6ce57 ^ seed;
for (let i = 0, ch; i < str.length; i++) {
ch = str.charCodeAt(i);
h1 = Math.imul(h1 ^ ch, 2654435761);
h2 = Math.imul(h2 ^ ch, 1597334677);
}
h1 =
Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^
Math.imul(h2 ^ (h2 >>> 13), 3266489909);
h2 =
Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^
Math.imul(h1 ^ (h1 >>> 13), 3266489909);
return 4294967296 * (2097151 & h2) + (h1 >>> 0);
};

function shuffle<T>(array: T[], seed: number | string = 0) {
if (typeof seed === 'string') {
seed = cyrb53(seed);
}
const random = mulberry32(seed);

const output = new Array(array.length);

for (let i = 0; i < array.length; i++) {
output[i] = array[i];
}

let m = output.length;

while (m) {
const i = Math.floor(random() * m--);

const t = output[m];
output[m] = output[i];
output[i] = t;
++seed;
}

return output;
}

export default shuffle;
12 changes: 6 additions & 6 deletions tests/loadCsv.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,18 @@ test('Shuffling should work and preserve feature - label pairs', () => {
// @ts-ignore
expect(features.arraySync()).toBeDeepCloseTo(
[
[102, -164],
[5, 40.34],
[0.234, 1.47],
[-93.2, 103.34],
[102, -164],
],
3
);
expect(labels.arraySync()).toMatchObject([
['Landotzka'],
['Landistan'],
['SomeCountria'],
['SomeOtherCountria'],
['Landotzka'],
]);
});

Expand All @@ -61,18 +61,18 @@ test('Shuffling with a custom seed should work', () => {
// @ts-ignore
expect(features.arraySync()).toBeDeepCloseTo(
[
[5, 40.34],
[-93.2, 103.34],
[102, -164],
[5, 40.34],
[0.234, 1.47],
[-93.2, 103.34],
],
3
);
expect(labels.arraySync()).toMatchObject([
['Landistan'],
['SomeOtherCountria'],
['Landotzka'],
['Landistan'],
['SomeCountria'],
['SomeOtherCountria'],
]);
});

Expand Down
24 changes: 24 additions & 0 deletions tests/shuffle.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import shuffle from '../src/shuffle';

const data = [1, 2, 3, 4];

test('Shuffling without a seed should change order', () => {
expect(shuffle(data)).toEqual([4, 3, 1, 2]);
});

test('Shuffling should not modify the original array', () => {
expect(shuffle(data)).not.toEqual(data);
});

test('Shuffling with a number seed should change order', () => {
expect(shuffle(data, 7)).toEqual([3, 2, 4, 1]);
});

test('Shuffling with a string seed should change order', () => {
expect(shuffle(data, 'hello')).toEqual([2, 4, 3, 1]);
});

test('Shuffling with different seeds should produce different results', () => {
const results = [shuffle(data, 7), shuffle(data, 'hello')];
expect(results[0]).not.toEqual(results[2]);
});