-
Notifications
You must be signed in to change notification settings - Fork 2
/
csv2ndjson.mjs
58 lines (51 loc) · 1.73 KB
/
csv2ndjson.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/* global process */
import { default as Papa } from "papaparse";
import { createReadStream, createWriteStream, existsSync } from "node:fs";
export async function convert({
inputPath,
outputPath,
}) {
// Quick (leaky) check that files exist
if (!inputPath || !existsSync(inputPath)) {
throw new Error(`Invalid or missing input path [${inputPath}] (did you specify INPUT_CSV_PATH ?)`);
}
if (!outputPath) {
throw new Error(`Invalid output path [${outputPath}] (did you specify OUTPUT_NDJSON_PATH ?)`);
}
// Open the output file
const outputStream = createWriteStream(outputPath);
// Parse the file
await new Promise((resolve, reject) => {
Papa.parse(
createReadStream(inputPath),
{
header: true,
delimeter: ",",
skipEmptyLines: true,
error: reject,
complete: () => resolve(),
chunk: (results) => {
for (const row of results.data) {
try {
const record = {
...row,
production_companies: JSON.parse(row.production_companies.replaceAll("'",'"')),
production_countries: JSON.parse(row.production_countries.replaceAll("'",'"')),
genres: JSON.parse(row.genres.replaceAll("'",'"')),
spoken_languages: JSON.parse(row.spoken_languages.replaceAll("'",'"')),
};
outputStream.write(`${JSON.stringify(record)}\n`);
} catch (err) {
process.stderr.write(`[warn] failed to write row with ID [${row.id}]: [${row.title}]\n`);
}
}
},
},
);
});
}
// Conver the input to the output
convert({
inputPath: process.env.INPUT_CSV_PATH,
outputPath: process.env.OUTPUT_NDJSON_PATH,
});