Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add outlines #223

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ lib/
coverage
*.tgz
examples/
*.pdf
*.pdf
!pdf/*.pdf
Binary file removed docs-to-pdf.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"commander": "^11.0.0",
"console-stamp": "^3.1.1",
"express": "^4.18.2",
"html-entities": "^2.4.0",
"pdf-lib": "^1.17.1",
"puppeteer": "^21.0.2",
"puppeteer-autoscroll-down": "^1.1.2",
"puppeteer-core": "^21.0.2"
Expand Down
2 changes: 1 addition & 1 deletion src/cli.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env node

import * as command from './command';
import * as command from './command/command';

const program = command.makeProgram();

Expand Down
6 changes: 3 additions & 3 deletions src/command.ts → src/command/command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ import {
commaSeparatedList,
generatePuppeteerPDFMargin,
} from './commander-options';
import { generatePDF, GeneratePDFOptions } from './core';
import { generatePDF, GeneratePDFOptions } from '../core';
import {
generateDocusaurusPDF,
DocusaurusOptions,
} from './provider/docusaurus';
} from '../provider/docusaurus';
import chalk from 'chalk';
import console_stamp from 'console-stamp';
const version = require('../package.json').version;
const version = require('../../package.json').version;

console_stamp(console);

Expand Down
File renamed without changes.
104 changes: 35 additions & 69 deletions src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,17 @@ import { scrollPageToBottom } from 'puppeteer-autoscroll-down';
import * as fs from 'fs-extra';
import { chromeExecPath } from './browser';
import * as utils from './utils';
import { PDF, PDFOptions } from './pdf/generate';

console_stamp(console);

let contentHTML = '';
export interface GeneratePDFOptions {

export interface GeneratePDFOptions extends PDFOptions {
initialDocURLs: Array<string>;
excludeURLs: Array<string>;
outputPDFFilename: string;
pdfMargin: puppeteer.PDFOptions['margin'];
contentSelector: string;
paginationSelector: string;
// deprecated - user paperFormat
pdfFormat?: puppeteer.PaperFormat;
paperFormat: puppeteer.PaperFormat;
excludeSelectors: Array<string>;
cssStyle: string;
puppeteerArgs: Array<string>;
Expand All @@ -27,8 +24,6 @@ export interface GeneratePDFOptions {
disableTOC: boolean;
coverSub: string;
waitForRender: number;
headerTemplate: string;
footerTemplate: string;
protocolTimeout: number;
filterKeyword: string;
baseUrl: string;
Expand All @@ -38,38 +33,14 @@ export interface GeneratePDFOptions {
}

/* c8 ignore start */
export async function generatePDF({
initialDocURLs,
excludeURLs,
outputPDFFilename = 'docs-to-pdf.pdf',
pdfMargin = { top: 32, right: 32, bottom: 32, left: 32 },
contentSelector,
paginationSelector,
paperFormat,
excludeSelectors,
cssStyle,
puppeteerArgs,
coverTitle,
coverImage,
disableTOC,
coverSub,
waitForRender,
headerTemplate,
footerTemplate,
protocolTimeout,
filterKeyword,
baseUrl,
excludePaths,
restrictPaths,
openDetail = true,
}: GeneratePDFOptions): Promise<void> {
export async function generatePDF(options: GeneratePDFOptions): Promise<void> {
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH ?? chromeExecPath();
console.debug(chalk.cyan(`Using Chromium from ${execPath}`));
const browser = await puppeteer.launch({
headless: 'new',
executablePath: execPath,
args: puppeteerArgs,
protocolTimeout: protocolTimeout,
args: options.puppeteerArgs,
protocolTimeout: options.protocolTimeout,
});

const chromeTmpDataDir = browser
Expand All @@ -89,8 +60,8 @@ export async function generatePDF({
} else request.continue();
});

console.debug(`InitialDocURLs: ${initialDocURLs}`);
for (const url of initialDocURLs) {
console.debug(`InitialDocURLs: ${options.initialDocURLs}`);
for (const url of options.initialDocURLs) {
let nextPageURL = url;
const urlPath = new URL(url).pathname;

Expand All @@ -103,52 +74,55 @@ export async function generatePDF({
waitUntil: 'networkidle0',
timeout: 0,
});
if (waitForRender) {
if (options.waitForRender) {
console.log(chalk.green('Waiting for render...'));
await new Promise((r) => setTimeout(r, waitForRender));
await new Promise((r) => setTimeout(r, options.waitForRender));
}

if (
await utils.isPageKept(
page,
nextPageURL,
urlPath,
excludeURLs,
filterKeyword,
excludePaths,
restrictPaths,
options.excludeURLs,
options.filterKeyword,
options.excludePaths,
options.restrictPaths,
)
) {
// Open all <details> elements on the page
if (openDetail) {
if (options.openDetail) {
await utils.openDetails(page);
}
// Get the HTML string of the content section.
contentHTML += await utils.getHtmlContent(page, contentSelector);
contentHTML += await utils.getHtmlContent(
page,
options.contentSelector,
);
console.log(chalk.green('Success'));
}

// Find next page url before DOM operations
nextPageURL = await utils.findNextUrl(page, paginationSelector);
nextPageURL = await utils.findNextUrl(page, options.paginationSelector);
}
}

console.log(chalk.cyan('Start generating PDF...'));

// Generate cover Image if declared
let coverImageHtml = '';
if (coverImage) {
if (options.coverImage) {
console.log(chalk.cyan('Get coverImage...'));
const image = await utils.getCoverImage(page, coverImage);
const image = await utils.getCoverImage(page, options.coverImage);
coverImageHtml = utils.generateImageHtml(image.base64, image.type);
}

// Generate Cover
console.log(chalk.cyan('Generate cover...'));
const coverHTML = utils.generateCoverHtml(
coverTitle,
options.coverTitle,
coverImageHtml,
coverSub,
options.coverSub,
);

// Generate Toc
Expand All @@ -158,27 +132,29 @@ export async function generatePDF({
console.log(chalk.cyan('Restructuring the html of a document...'));

// Go to initial page
await page.goto(`${initialDocURLs[0]}`, { waitUntil: 'networkidle0' });
await page.goto(`${options.initialDocURLs[0]}`, {
waitUntil: 'networkidle0',
});

await page.evaluate(
utils.concatHtml,
coverHTML,
tocHTML,
modifiedContentHTML,
disableTOC,
baseUrl,
options.disableTOC,
options.baseUrl,
);

// Remove unnecessary HTML by using excludeSelectors
if (excludeSelectors) {
if (options.excludeSelectors) {
console.log(chalk.cyan('Remove unnecessary HTML...'));
await utils.removeExcludeSelector(page, excludeSelectors);
await utils.removeExcludeSelector(page, options.excludeSelectors);
}

// Add CSS to HTML
if (cssStyle) {
if (options.cssStyle) {
console.log(chalk.cyan('Add CSS to HTML...'));
await page.addStyleTag({ content: cssStyle });
await page.addStyleTag({ content: options.cssStyle });
}

// Scroll to the bottom of the page with puppeteer-autoscroll-down
Expand All @@ -187,19 +163,9 @@ export async function generatePDF({
await scrollPageToBottom(page, {}); //cast to puppeteer-core type

// Generate PDF
console.log(chalk.cyan('Generate PDF...'));
await page.pdf({
path: outputPDFFilename,
format: paperFormat,
printBackground: true,
margin: pdfMargin,
displayHeaderFooter: !!(headerTemplate || footerTemplate),
headerTemplate,
footerTemplate,
timeout: 0,
});
const pdf = new PDF(options);
await pdf.generate(page);

console.log(chalk.green(`PDF generated at ${outputPDFFilename}`));
await browser.close();
console.log(chalk.green('Browser closed'));

Expand Down
71 changes: 71 additions & 0 deletions src/pdf/generate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import * as puppeteer from 'puppeteer-core';
import chalk from 'chalk';
import { getOutline, setOutline } from './outline';
import { PDFDocument } from 'pdf-lib';
import { writeFileSync } from 'fs';

export interface PDFOptions {
outputPDFFilename: string;
paperFormat: puppeteer.PaperFormat;
pdfFormat?: puppeteer.PaperFormat;
pdfMargin: puppeteer.PDFOptions['margin'];
headerTemplate: string;
footerTemplate: string;
}

export class PDF {
private readonly options: PDFOptions;

constructor(options: PDFOptions) {
this.options = options;
}

/**
* Generate PDF
* @param page
* @returns
* @throws {Error} - if page.pdf() fails
*/
public async generate(page: puppeteer.Page): Promise<void> {
console.log(chalk.cyan('Generate PDF...'));
const outline = await getOutline(page, [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
]);
console.log(chalk.green('Outline generated'));

const pdfExportOptions = {
path: this.options.outputPDFFilename ?? 'output.pdf',
format: this.options.paperFormat,
margin: this.options.pdfMargin ?? {
top: 32,
right: 32,
bottom: 32,
left: 32,
},
headerTemplate: this.options.headerTemplate,
footerTemplate: this.options.footerTemplate,
displayHeaderFooter: !!(
this.options.headerTemplate || this.options.footerTemplate
),
printBackground: true,
timeout: 0,
};

const pdf = await page.pdf(pdfExportOptions).catch((err) => {
console.error(chalk.red(err));
throw new Error(err);
});
const pdfDoc = await PDFDocument.load(pdf);
setOutline(pdfDoc, outline, true);
const buffer = await pdfDoc.save();
writeFileSync(this.options.outputPDFFilename ?? 'output.pdf', buffer);
console.log(
chalk.green(`PDF generated at ${this.options.outputPDFFilename ?? 'output.pdf'}`),
);
}
}

Check warning on line 71 in src/pdf/generate.ts

View check run for this annotation

Codecov / codecov/patch

src/pdf/generate.ts#L17-L71

Added lines #L17 - L71 were not covered by tests
Loading
Loading