Skip to content

Commit

Permalink
feat: expose onBrowserPage option
Browse files Browse the repository at this point in the history
  • Loading branch information
egoist committed Jul 31, 2020
1 parent b8a0467 commit 5d099c4
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 21 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,18 @@ module.exports = {

Now you should call `window.__my_snapshot__()` instead.

### Access Puppeteer browser page

Access the [`page`](https://pptr.dev/#?product=Puppeteer&version=v5.2.1&show=api-class-page) instance, for example, to expose some functions from Node.js to browser:

```js
module.exports = {
async onBrowserPage(page) {
await page.exposeFunction('md5', (content) => md5(content))
},
}
```

### Source directory

This is the same as using CLI `presite ./path/to/your/spa`:
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"polka": "^0.5.2",
"read-pkg-up": "^2.0.0",
"serve-static": "^1.14.1",
"taki": "2.2.2",
"taki": "2.3.0",
"update-notifier": "^4.1.0"
},
"devDependencies": {
Expand Down
40 changes: 24 additions & 16 deletions src/Crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import chalk from 'chalk'
import { PromiseQueue } from '@egoist/promise-queue'
import { Writer } from './Writer'
import { Logger } from './Logger'
import { Page } from 'puppeteer-core'

export const SPECIAL_EXTENSIONS_RE = /\.(xml|json)$/

Expand All @@ -14,16 +15,12 @@ const routeToFile = (route: string) => {
return route.replace(/\/?$/, '/index.html')
}

const getHref = (attrs: string) => {
const match = /href\s*=\s*(?:"(.*?)"|'(.*?)'|([^\s>]*))/.exec(attrs)
return match && (match[1] || match[2] || match[3])
}

type CrawlerOptions = {
hostname: string
port: number
options: {
routes: string[] | (() => Promise<string[]>)
onBrowserPage?: (page: Page) => void | Promise<void>
}
writer: Writer
logger: Logger
Expand All @@ -48,13 +45,31 @@ export class Crawler {
const queue = new PromiseQueue(
async (route: string) => {
const file = routeToFile(route)
let links: Set<string> | undefined
const html = await request({
url: `http://${hostname}:${port}${route}`,
onBeforeRequest(url) {
logger.log(`Crawling contents from ${chalk.cyan(url)}`)
},
async onBeforeClosingPage(page) {
links = new Set(
await page.evaluate(
({ hostname, port }: { hostname: string; port: string }) => {
return Array.from(document.querySelectorAll('a'))
.filter((a) => {
return a.hostname === hostname && a.port === port
})
.map((a) => a.pathname)
},
{ hostname, port: String(port) }
)
)
},
manually: SPECIAL_EXTENSIONS_RE.test(route) ? true : undefined,
onCreatedPage(page) {
async onCreatedPage(page) {
if (options.onBrowserPage) {
await options.onBrowserPage(page)
}
page.on('console', (e) => {
const type = e.type()
// @ts-ignore
Expand All @@ -68,16 +83,9 @@ export class Crawler {
},
})

// find all `<a>` tags in exported html files and export links that are not yet exported
let match: RegExpExecArray | null = null
const LINK_RE = /<a ([\s\S]+?)>/gm
while ((match = LINK_RE.exec(html))) {
const href = getHref(match[1])
if (href) {
const parsed = parseUrl(href)
if (!parsed.host && parsed.pathname) {
queue.add(parsed.pathname)
}
if (links && links.size > 0) {
for (const link of links) {
queue.add(link)
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { cac } from 'cac'
import chalk from 'chalk'
import update from 'update-notifier'
import JoyCon from 'joycon'
import { Page } from 'puppeteer-core'

const pkg: typeof import('../package.json') = require('../package')

Expand Down Expand Up @@ -36,6 +37,7 @@ async function main() {
baseDir?: string
outDir?: string
routes?: string[] | (() => Promise<string[]>)
onBrowserPage?: (page: Page) => void | Promise<void>
}

let config: Required<ConfigInput>
Expand Down Expand Up @@ -85,6 +87,7 @@ async function main() {
port: server.port!,
options: {
routes: config.routes,
onBrowserPage: config.onBrowserPage,
},
writer,
logger,
Expand Down
8 changes: 4 additions & 4 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3155,10 +3155,10 @@ supports-color@^7.1.0:
dependencies:
has-flag "^4.0.0"

taki@2.2.2:
version "2.2.2"
resolved "https://registry.yarnpkg.com/taki/-/taki-2.2.2.tgz#79f44c1a04efbd171e7881caa980f667a5486abe"
integrity sha512-lKaHTw5RLFWbB4rs7vVo9O5UsC9/DIl51ijYKzgXQ44nHqgDxqPMHdCb2GnIthfC99lzvNdFup+qxjISKXV5tA==
taki@2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/taki/-/taki-2.3.0.tgz#c5ad67af7e04eaaee8c11ba8736e51aed84fc2f5"
integrity sha512-CP4lDcqHWj2s03HrJUGF2jn+k/ABm4JUu2IZ8zLGyqPO/nhjxQVHBx11UaaNqg2gIoBIUSvEk5KgFE/AnCOAeQ==
dependencies:
debug "4.1.1"
html-minifier "4.0.0"
Expand Down

0 comments on commit 5d099c4

Please sign in to comment.