From dfe9fb00fa9d1bc5dce740c49901a6398638b742 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Thu, 24 Sep 2020 12:02:59 +0200 Subject: [PATCH] fix(build): remove stale page-data files (#26937) * test(artifacts): assert (un)expected page-data/html files * fix(gatsby): delete stale page-data on builds * remove deepFilter option it could cause very weird edge cases if user actually have pages with `/sq/d/` prefix and we already check for `page-data.json` file (static query will have [hash].json names) * handle fs traversal errors (kind of) --- .../artifacts/__tests__/index.js | 335 ++++++++++++++++++ .../artifacts/__tests__/static-queries.js | 183 ---------- integration-tests/artifacts/gatsby-node.js | 24 ++ .../artifacts/src/templates/dummy.js | 5 + packages/gatsby/package.json | 1 + packages/gatsby/src/bootstrap/index.ts | 4 + packages/gatsby/src/utils/page-data.ts | 51 +++ yarn.lock | 2 +- 8 files changed, 421 insertions(+), 184 deletions(-) create mode 100644 integration-tests/artifacts/__tests__/index.js delete mode 100644 integration-tests/artifacts/__tests__/static-queries.js create mode 100644 integration-tests/artifacts/gatsby-node.js create mode 100644 integration-tests/artifacts/src/templates/dummy.js diff --git a/integration-tests/artifacts/__tests__/index.js b/integration-tests/artifacts/__tests__/index.js new file mode 100644 index 0000000000000..7dc2bfc03e6b5 --- /dev/null +++ b/integration-tests/artifacts/__tests__/index.js @@ -0,0 +1,335 @@ +const { spawn } = require(`child_process`) +const path = require(`path`) +const { murmurhash } = require(`babel-plugin-remove-graphql-queries`) +const { readPageData } = require(`gatsby/dist/utils/page-data`) +const { stripIgnoredCharacters } = require(`gatsby/graphql`) +const fs = require(`fs`) + +jest.setTimeout(100000) + +const publicDir = path.join(process.cwd(), `public`) + +const gatsbyBin = path.join(`node_modules`, `.bin`, `gatsby`) + +const titleQuery = ` + { + site { + siteMetadata { + title + } + } + } +` + +const authorQuery = ` + { + site { + siteMetadata { + author + } + } + } +` + +const githubQuery = ` + { + site { + siteMetadata { + github + } + } + } +` + +const moreInfoQuery = ` + { + site { + siteMetadata { + moreInfo + } + } + } +` + +function hashQuery(query) { + const text = stripIgnoredCharacters(query) + const hash = murmurhash(text, `abc`) + return String(hash) +} + +const globalQueries = [githubQuery, moreInfoQuery] + +const pagePathToFilePath = { + html: pagePath => path.join(`public`, pagePath, `index.html`), + "page-data": pagePath => + path.join( + `public`, + `page-data`, + pagePath === `/` ? `index` : pagePath, + `page-data.json` + ), +} + +function assertFileExistenceForPagePaths({ pagePaths, type, shouldExist }) { + if (![`html`, `page-data`].includes(type)) { + throw new Error(`Unexpected type`) + } + + test.each(pagePaths)( + `${type} file for "%s" ${shouldExist ? `exists` : `DOESN'T exist`}`, + async pagePath => { + const filePath = pagePathToFilePath[type](pagePath) + const exists = await new Promise(resolve => { + fs.stat(filePath, err => { + resolve(err === null) + }) + }) + + expect(exists).toBe(shouldExist) + } + ) +} + +beforeAll(async done => { + const gatsbyCleanProcess = spawn(gatsbyBin, [`clean`], { + stdio: [`inherit`, `inherit`, `inherit`, `inherit`], + env: { + ...process.env, + NODE_ENV: `production`, + }, + }) + + gatsbyCleanProcess.on(`exit`, exitCode => { + done() + }) +}) + +describe(`First run`, () => { + beforeAll(async done => { + const gatsbyProcess = spawn(gatsbyBin, [`build`], { + stdio: [`inherit`, `inherit`, `inherit`, `inherit`], + env: { + ...process.env, + NODE_ENV: `production`, + RUN_FOR_STALE_PAGE_ARTIFICATS: `1`, + }, + }) + + gatsbyProcess.on(`exit`, exitCode => { + done() + }) + }) + + describe(`Static Queries`, () => { + test(`are written correctly when inline`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/inline/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly when imported`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/import/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly when dynamically imported`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/dynamic/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly in jsx`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/jsx/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly in tsx`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/tsx/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly in typescript`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/typescript/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly when nesting imports`, async () => { + const queries = [titleQuery, authorQuery, ...globalQueries] + const pagePath = `/import-import/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly when nesting dynamic imports`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/dynamic-dynamic/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly when nesting a dynamic import in a regular import`, async () => { + const queries = [titleQuery, authorQuery, ...globalQueries] + const pagePath = `/import-dynamic/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly when nesting a regular import in a dynamic import`, async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/dynamic-import/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test("are written correctly with circular dependency", async () => { + const queries = [titleQuery, ...globalQueries] + const pagePath = `/circular-dep/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + + test(`are written correctly when using gatsby-browser`, async () => { + const queries = [...globalQueries] + const pagePath = `/gatsby-browser/` + + const { staticQueryHashes } = await readPageData(publicDir, pagePath) + + expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) + }) + }) + + const expectedPages = [`stale-pages/stable`, `stale-pages/only-in-first`] + const unexpectedPages = [`stale-pages/only-in-second`] + + describe(`html files`, () => { + const type = `html` + + describe(`should have expected html files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: expectedPages, + type, + shouldExist: true, + }) + }) + + describe(`shouldn't have unexpected html files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: unexpectedPages, + type, + shouldExist: false, + }) + }) + }) + + describe(`page-data files`, () => { + const type = `page-data` + + describe(`should have expected page-data files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: expectedPages, + type, + shouldExist: true, + }) + }) + + describe(`shouldn't have unexpected page-data files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: unexpectedPages, + type, + shouldExist: false, + }) + }) + }) +}) + +describe(`Second run`, () => { + const expectedPages = [`stale-pages/stable`, `stale-pages/only-in-second`] + const unexpectedPages = [`stale-pages/only-in-first`] + + beforeAll(async done => { + const gatsbyProcess = spawn(gatsbyBin, [`build`], { + stdio: [`inherit`, `inherit`, `inherit`, `inherit`], + env: { + ...process.env, + NODE_ENV: `production`, + RUN_FOR_STALE_PAGE_ARTIFICATS: `2`, + }, + }) + + gatsbyProcess.on(`exit`, exitCode => { + done() + }) + }) + + describe(`html files`, () => { + const type = `html` + + describe(`should have expected html files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: expectedPages, + type, + shouldExist: true, + }) + }) + + describe(`shouldn't have unexpected html files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: unexpectedPages, + type, + shouldExist: false, + }) + }) + }) + + describe(`page-data files`, () => { + const type = `page-data` + + describe(`should have expected page-data files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: expectedPages, + type, + shouldExist: true, + }) + }) + + describe(`shouldn't have unexpected page-data files`, () => { + assertFileExistenceForPagePaths({ + pagePaths: unexpectedPages, + type, + shouldExist: false, + }) + }) + }) +}) diff --git a/integration-tests/artifacts/__tests__/static-queries.js b/integration-tests/artifacts/__tests__/static-queries.js deleted file mode 100644 index 1ad14df959673..0000000000000 --- a/integration-tests/artifacts/__tests__/static-queries.js +++ /dev/null @@ -1,183 +0,0 @@ -const { spawn } = require(`child_process`) -const path = require(`path`) -const { murmurhash } = require(`babel-plugin-remove-graphql-queries`) -const { readPageData } = require(`gatsby/dist/utils/page-data`) -const { stripIgnoredCharacters } = require(`gatsby/graphql`) - -jest.setTimeout(100000) - -const publicDir = path.join(process.cwd(), `public`) - -const gatsbyBin = path.join(`node_modules`, `.bin`, `gatsby`) - -const titleQuery = ` - { - site { - siteMetadata { - title - } - } - } -` - -const authorQuery = ` - { - site { - siteMetadata { - author - } - } - } -` - -const githubQuery = ` - { - site { - siteMetadata { - github - } - } - } -` - -const moreInfoQuery = ` - { - site { - siteMetadata { - moreInfo - } - } - } -` - -function hashQuery(query) { - const text = stripIgnoredCharacters(query) - const hash = murmurhash(text, `abc`) - return String(hash) -} - -const globalQueries = [githubQuery, moreInfoQuery] - -describe(`Static Queries`, () => { - beforeAll(async done => { - const gatsbyProcess = spawn(gatsbyBin, [`build`], { - stdio: [`inherit`, `inherit`, `inherit`, `inherit`], - env: { - ...process.env, - NODE_ENV: `production`, - }, - }) - - gatsbyProcess.on(`exit`, exitCode => { - done() - }) - }) - - test(`are written correctly when inline`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/inline/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly when imported`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/import/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly when dynamically imported`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/dynamic/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly in jsx`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/jsx/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly in tsx`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/tsx/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly in typescript`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/typescript/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly when nesting imports`, async () => { - const queries = [titleQuery, authorQuery, ...globalQueries] - const pagePath = `/import-import/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly when nesting dynamic imports`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/dynamic-dynamic/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly when nesting a dynamic import in a regular import`, async () => { - const queries = [titleQuery, authorQuery, ...globalQueries] - const pagePath = `/import-dynamic/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly when nesting a regular import in a dynamic import`, async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/dynamic-import/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test("are written correctly with circular dependency", async () => { - const queries = [titleQuery, ...globalQueries] - const pagePath = `/circular-dep/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) - - test(`are written correctly when using gatsby-browser`, async () => { - const queries = [...globalQueries] - const pagePath = `/gatsby-browser/` - - const { staticQueryHashes } = await readPageData(publicDir, pagePath) - - expect(staticQueryHashes.sort()).toEqual(queries.map(hashQuery).sort()) - }) -}) diff --git a/integration-tests/artifacts/gatsby-node.js b/integration-tests/artifacts/gatsby-node.js new file mode 100644 index 0000000000000..b444e095ef492 --- /dev/null +++ b/integration-tests/artifacts/gatsby-node.js @@ -0,0 +1,24 @@ +const isFirstRun = process.env.RUN_FOR_STALE_PAGE_ARTIFICATS !== `2` + +exports.createPages = ({ actions }) => { + function createPageHelper(dummyId) { + actions.createPage({ + path: `/stale-pages/${dummyId}`, + component: require.resolve(`./src/templates/dummy`), + context: { + dummyId, + }, + }) + } + + // stable page that always gets created + createPageHelper(`stable`) + + if (isFirstRun) { + // page exists in first run, but not in second + createPageHelper(`only-in-first`) + } else { + // page exists only in second run + createPageHelper(`only-in-second`) + } +} diff --git a/integration-tests/artifacts/src/templates/dummy.js b/integration-tests/artifacts/src/templates/dummy.js new file mode 100644 index 0000000000000..16fb79ce5919e --- /dev/null +++ b/integration-tests/artifacts/src/templates/dummy.js @@ -0,0 +1,5 @@ +import React from "react" + +export default function DummyTemplate({ pageContext }) { + return

Dummy page for {pageContext.dummyId}

+} diff --git a/packages/gatsby/package.json b/packages/gatsby/package.json index 3a43e216e881f..21888dbeacb16 100644 --- a/packages/gatsby/package.json +++ b/packages/gatsby/package.json @@ -18,6 +18,7 @@ "@babel/types": "^7.11.5", "@hapi/joi": "^15.1.1", "@mikaelkristiansson/domready": "^1.0.10", + "@nodelib/fs.walk": "^1.2.4", "@pieh/friendly-errors-webpack-plugin": "1.7.0-chalk-2", "@pmmmwh/react-refresh-webpack-plugin": "^0.4.1", "@reach/router": "^1.3.4", diff --git a/packages/gatsby/src/bootstrap/index.ts b/packages/gatsby/src/bootstrap/index.ts index 5e8ec23d599d7..3773e160d15de 100644 --- a/packages/gatsby/src/bootstrap/index.ts +++ b/packages/gatsby/src/bootstrap/index.ts @@ -16,6 +16,8 @@ import { Runner, createGraphQLRunner } from "./create-graphql-runner" import reporter from "gatsby-cli/lib/reporter" import { globalTracer } from "opentracing" import JestWorker from "jest-worker" +import { handleStalePageData } from "../utils/page-data" + const tracer = globalTracer() export async function bootstrap( @@ -54,6 +56,8 @@ export async function bootstrap( await createPagesStatefully(context) + await handleStalePageData() + await rebuildSchemaWithSitePage(context) await extractQueries(context) diff --git a/packages/gatsby/src/utils/page-data.ts b/packages/gatsby/src/utils/page-data.ts index c898f5a64faf1..75582fc36cd1d 100644 --- a/packages/gatsby/src/utils/page-data.ts +++ b/packages/gatsby/src/utils/page-data.ts @@ -1,4 +1,6 @@ +import { walkStream as fsWalkStream, Entry } from "@nodelib/fs.walk" import fs from "fs-extra" +import reporter from "gatsby-cli/lib/reporter" import path from "path" import { IGatsbyPage } from "../redux/types" import { websocketManager } from "./websocket-manager" @@ -178,3 +180,52 @@ export function enqueueFlush(): void { flush() } } + +export async function handleStalePageData(): Promise { + if (!(await fs.pathExists(`public/page-data`))) { + return + } + + // public directory might have stale page-data files from previous builds + // we get the list of those and compare against expected page-data files + // and remove ones that shouldn't be there anymore + + const activity = reporter.activityTimer(`Cleaning up stale page-data`) + activity.start() + + const pageDataFilesFromPreviousBuilds = await new Promise>( + (resolve, reject) => { + const results = new Set() + + const stream = fsWalkStream(`public/page-data`) + + stream.on(`data`, (data: Entry) => { + if (data.name === `page-data.json`) { + results.add(data.path) + } + }) + + stream.on(`error`, e => { + reject(e) + }) + + stream.on(`end`, () => resolve(results)) + } + ) + + const expectedPageDataFiles = new Set() + store.getState().pages.forEach(page => { + expectedPageDataFiles.add(getFilePath(`public`, page.path)) + }) + + const deletionPromises: Array> = [] + pageDataFilesFromPreviousBuilds.forEach(pageDataFilePath => { + if (!expectedPageDataFiles.has(pageDataFilePath)) { + deletionPromises.push(fs.remove(pageDataFilePath)) + } + }) + + await Promise.all(deletionPromises) + + activity.end() +} diff --git a/yarn.lock b/yarn.lock index 6da600dc79ef2..6dece1af1abac 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2780,7 +2780,7 @@ version "1.1.3" resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-1.1.3.tgz#2b5a3ab3f918cca48a8c754c08168e3f03eba61b" -"@nodelib/fs.walk@^1.2.3": +"@nodelib/fs.walk@^1.2.3", "@nodelib/fs.walk@^1.2.4": version "1.2.4" resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.4.tgz#011b9202a70a6366e436ca5c065844528ab04976" integrity sha512-1V9XOY4rDW0rehzbrcqAmHnz8e7SKvX27gh8Gt2WgB0+pdzdiLV83p72kZPU+jvMbS1qU5mauP2iOvO8rhmurQ==