Skip to content

Commit

Permalink
feat(lib): Populate feature and labels in user provided order (#217)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: Previously column order in the CSV file was preserved,
this is no longer the case, therefore changing the output.
  • Loading branch information
isair committed Mar 19, 2021
1 parent 93c9296 commit de40f95
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 14 deletions.
28 changes: 23 additions & 5 deletions src/filterColumns.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
import { CsvTable } from './loadCsv.models';

const filterColumns = (table: CsvTable, columnNames: string[]) => {
const indexKeepDecisions = table[0].map(
(header) => columnNames.indexOf(header as string) > -1
/**
* Filters and re-orders columns in a given CSV table.
*
* Where n is the number of non-header cells in `table`, `m` is the number of header cells in `table`, and h is the number of items in `headers`
*
* Time complexity: O(n + mh)
*
* Space complexity: O(n + m + h)
*/
const filterColumns = (table: CsvTable, headers: string[]) => {
const indexKeepDecisions = table[0].map((columnName) =>
headers.includes(columnName as string)
);
return table.map((row) =>
row.filter((_, index) => indexKeepDecisions[index])
const filteredColumnNames = table[0].filter((_, i) => indexKeepDecisions[i]);
const indexMap = filteredColumnNames.map((columnName) =>
headers.indexOf(columnName as string)
);
return table.map((row) => {
const newRow = new Array(indexMap.length);
for (let i = 0, j = 0; i < row.length; i++) {
if (!indexKeepDecisions[i]) continue;
newRow[indexMap[j++]] = row[i];
}
return newRow;
});
};

export default filterColumns;
7 changes: 5 additions & 2 deletions src/loadCsv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,18 @@ const loadCsv = (
throw new Error('CSV file can not be shorter than two rows');
}

applyMappings(data, mappings, new Set(flatten));

const tables: { [key: string]: CsvTable } = {
labels: filterColumns(data, labelColumns),
features: filterColumns(data, featureColumns),
testFeatures: [],
testLabels: [],
};

const flattenSet = new Set(flatten);

applyMappings(tables.labels, mappings, flattenSet);
applyMappings(tables.features, mappings, flattenSet);

tables.labels.shift();
const featureColumnNames = tables.features.shift() as string[];

Expand Down
14 changes: 7 additions & 7 deletions tests/filterColumns.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@ import filterColumns from '../src/filterColumns';

const data = [
['lat', 'lng', 'country'],
['0.234', '1.47', 'SomeCountria'],
['-293.2', '103.34', 'SomeOtherCountria'],
['0', '1.47', 'SomeCountria'],
['1', '103.34', 'SomeOtherCountria'],
];

test('Filtering a single column works correctly', () => {
const result = filterColumns(data, ['lng']);
expect(result).toMatchObject([['lng'], ['1.47'], ['103.34']]);
});

test('Filtering multiple columns works correctly', () => {
const result = filterColumns(data, ['country', 'lng']); // Column order from the CSV should be preserved.
test('Filtering multiple columns works correctly, respects order in second argument, does not break with multiple same name columns', () => {
const result = filterColumns(data, ['country', 'lat']); // Column order from the CSV should be preserved.
expect(result).toMatchObject([
['lng', 'country'],
['1.47', 'SomeCountria'],
['103.34', 'SomeOtherCountria'],
['country', 'lat'],
['SomeCountria', '0'],
['SomeOtherCountria', '1'],
]);
});

0 comments on commit de40f95

Please sign in to comment.