Skip to content

Commit

Permalink
fix(core): friendlier inspectHtmlTask logs
Browse files Browse the repository at this point in the history
  • Loading branch information
harlan-zw committed May 13, 2023
1 parent 2c753b6 commit f3b7949
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions packages/core/src/puppeteer/tasks/html.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { join } from 'path'
import { join } from 'node:path'
import fs from 'fs-extra'
import type { CheerioAPI } from 'cheerio'
import cheerio from 'cheerio'
import type { Page } from 'puppeteer-core'
import { $URL, withoutTrailingSlash } from 'ufo'
import chalk from 'chalk'
import type { HTMLExtractPayload, PuppeteerTask } from '../../types'
import { useUnlighthouse } from '../../unlighthouse'
import { useLogger } from '../../logger'
Expand Down Expand Up @@ -77,7 +78,7 @@ export const extractHtmlPayload: (page: Page, route: string) => Promise<{ succes
}
}

export const processSeoMeta = ($: CheerioAPI): HTMLExtractPayload => {
export function processSeoMeta($: CheerioAPI): HTMLExtractPayload {
return {
alternativeLangDefault: $('link[hreflang="x-default"]').attr('href'),
favicon: $('link[rel~="icon"]').attr('href') || '/favicon.ico',
Expand All @@ -97,22 +98,22 @@ export const inspectHtmlTask: PuppeteerTask = async (props) => {
const logger = useLogger()
let html: string

const start = new Date()
// basic caching based on saving html payloads
const htmlPayloadPath = join(routeReport.artifactPath, ReportArtifacts.html)
let cached = false
if (resolvedConfig.cache && fs.existsSync(htmlPayloadPath)) {
html = fs.readFileSync(htmlPayloadPath, { encoding: 'utf-8' })
logger.debug(`Running \`inspectHtmlTask\` for \`${routeReport.route.path}\` using cache.`)
cached = true
}
else {
const start = new Date()
const response = await extractHtmlPayload(page, routeReport.route.url)
const end = new Date()
const seconds = Math.round(end.getTime() - start.getTime())
logger.debug(`HTML extract of \`${routeReport.route.url}\` took \`${seconds}\`ms`)
logger.debug(`HTML extract of \`${routeReport.route.url}\` response ${response.success ? 'succeeded' : 'failed'}.`)

if (!response.success || !response.payload) {
routeReport.tasks.inspectHtmlTask = 'ignore'
logger.warn(`Failed to extract HTML payload from route \`${routeReport.route.path}\`: ${response.message}`)
logger.info(`Skipping ${routeReport.route.path}. ${response.message}`)
return routeReport
}
if (response.redirected) {
Expand Down Expand Up @@ -173,7 +174,13 @@ export const inspectHtmlTask: PuppeteerTask = async (props) => {
await hooks.callHook('discovered-internal-links', routeReport.route.path, internalLinks)
routeReport.seo.internalLinks = internalLinks.length
routeReport.seo.externalLinks = externalLinks.length
logger.success(`Completed \`inspectHtmlTask\` for \`${routeReport.route.path}\`. [Size: \`${formatBytes(html.length)}\`]`)
const end = new Date()
const ms = Math.round(end.getTime() - start.getTime())
// make ms human friendly
const seconds = (ms / 1000).toFixed(1)
if (!cached)
logger.success(`Completed \`inspectHtmlTask\` for \`${routeReport.route.path}\`. ${chalk.gray(`(${formatBytes(html.length)} took ${seconds}s)`)}`)

// only need the html payload for caching purposes, unlike the lighthouse reports
if (resolvedConfig.cache)
fs.writeFileSync(htmlPayloadPath, html)
Expand Down

0 comments on commit f3b7949

Please sign in to comment.