Skip to content

Commit

Permalink
chore(gatsby): upgrade from lmdb-store to lmdb (#34576)
Browse files Browse the repository at this point in the history
* optimistic bump

* .clear -> clearSync

* tmp: patch lmdb so we can bundle it for engines

* adjust cache-resilience to import from lmdb

* bump lmdb to latest

* use clearKeptObjects()

* fix mocking os.platform() in tests causing loading of wrong lmdb binary

* ensure single instance of lmdb in a process - for gatsby serve case mostly

* add comment about lmdb webpack loader patch
  • Loading branch information
pieh committed Feb 15, 2022
1 parent 3df8583 commit 54d29c4
Show file tree
Hide file tree
Showing 12 changed files with 143 additions and 42 deletions.
2 changes: 1 addition & 1 deletion integration-tests/cache-resilience/gatsby-node.js
Expand Up @@ -3,7 +3,7 @@ const v8 = require(`v8`)
const glob = require(`glob`)
const path = require(`path`)
const _ = require(`lodash`)
const { open } = require(`lmdb-store`)
const { open } = require(`lmdb`)

const { saveState } = require(`gatsby/dist/redux/save-state`)

Expand Down
2 changes: 1 addition & 1 deletion packages/gatsby/package.json
Expand Up @@ -103,7 +103,7 @@
"joi": "^17.4.2",
"json-loader": "^0.5.7",
"latest-version": "5.1.0",
"lmdb-store": "^1.6.11",
"lmdb": "2.2.1",
"lodash": "^4.17.21",
"md5-file": "^5.0.0",
"meant": "^1.0.3",
Expand Down
11 changes: 4 additions & 7 deletions packages/gatsby/src/commands/__tests__/build-utils.ts
Expand Up @@ -5,7 +5,6 @@ import {
IStaticQueryResultState,
} from "../../redux/types"

const platformSpy = jest.spyOn(require(`os`), `platform`)
interface IMinimalStateSliceForTest {
html: IGatsbyState["html"]
pages: IGatsbyState["pages"]
Expand Down Expand Up @@ -245,16 +244,13 @@ describe(`calcDirtyHtmlFiles`, () => {
})

describe(`onCreatePage + deletePage + createPage that change path casing of a page`, () => {
afterAll(() => {
platformSpy.mockRestore()
})

it(`linux (case sensitive file system)`, () => {
let isolatedCalcDirtyHtmlFiles
jest.isolateModules(() => {
platformSpy.mockImplementation(() => `linux`)
process.env.TEST_FORCE_CASE_FS = `SENSITIVE`
isolatedCalcDirtyHtmlFiles =
require(`../build-utils`).calcDirtyHtmlFiles
delete process.env.TEST_FORCE_CASE_FS
})

const state = generateStateToTestHelper({
Expand All @@ -280,9 +276,10 @@ describe(`calcDirtyHtmlFiles`, () => {
it(`windows / mac (case insensitive file system)`, () => {
let isolatedCalcDirtyHtmlFiles
jest.isolateModules(() => {
platformSpy.mockImplementation(() => `win32`)
process.env.TEST_FORCE_CASE_FS = `INSENSITIVE`
isolatedCalcDirtyHtmlFiles =
require(`../build-utils`).calcDirtyHtmlFiles
delete process.env.TEST_FORCE_CASE_FS
})

const state = generateStateToTestHelper({
Expand Down
4 changes: 3 additions & 1 deletion packages/gatsby/src/commands/build-utils.ts
Expand Up @@ -65,7 +65,9 @@ export const removePageFiles = async (
})
}

const FSisCaseInsensitive = platform() === `win32` || platform() === `darwin`
const FSisCaseInsensitive = process.env.TEST_FORCE_CASE_FS
? process.env.TEST_FORCE_CASE_FS === `INSENSITIVE`
: platform() === `win32` || platform() === `darwin`
function normalizePagePath(path: string): string {
if (path === `/`) {
return `/`
Expand Down
22 changes: 20 additions & 2 deletions packages/gatsby/src/datastore/common/iterable.ts
@@ -1,3 +1,9 @@
declare module "lmdb" {
// currently lmdb doesn't have typings for this export
export function clearKeptObjects(): void
}

import { clearKeptObjects } from "lmdb"
/**
* Wrapper for any iterable providing chainable interface and convenience methods
* similar to array.
Expand All @@ -10,10 +16,22 @@
export class GatsbyIterable<T> {
constructor(private source: Iterable<T> | (() => Iterable<T>)) {}

[Symbol.iterator](): Iterator<T> {
*[Symbol.iterator](): Iterator<T> {
const source =
typeof this.source === `function` ? this.source() : this.source
return source[Symbol.iterator]()

let i = 0
for (const val of source) {
yield val

// clearKeptObjects just make it possible for WeakRefs used in any way during current
// sync execution tick to be garbage collected. It doesn't force GC, just remove
// internal strong references in V8.
// see https://github.com/kriszyp/weak-lru-cache/issues/4
if (++i % 100 === 0) {
clearKeptObjects()
}
}
}

concat<U>(other: Iterable<U>): GatsbyIterable<T | U> {
Expand Down
38 changes: 31 additions & 7 deletions packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
@@ -1,4 +1,4 @@
import { RootDatabase, open, ArrayLikeIterable } from "lmdb-store"
import { RootDatabase, open, ArrayLikeIterable } from "lmdb"
// import { performance } from "perf_hooks"
import { ActionsUnion, IGatsbyNode } from "../../redux/types"
import { updateNodes } from "./updates/nodes"
Expand Down Expand Up @@ -61,8 +61,31 @@ function getRootDb(): RootDatabase {
return rootDb
}

/* eslint-disable @typescript-eslint/no-namespace */
declare global {
namespace NodeJS {
// eslint-disable-next-line @typescript-eslint/naming-convention
interface Global {
__GATSBY_OPEN_LMDBS?: Map<string, ILmdbDatabases>
}
}
}

function getDatabases(): ILmdbDatabases {
if (!databases) {
// __GATSBY_OPEN_LMDBS tracks if we already opened given db in this process
// In `gatsby serve` case we might try to open it twice - once for engines
// and second to get access to `SitePage` nodes (to power trailing slashes
// redirect middleware). This ensure there is single instance within a process.
// Using more instances seems to cause weird random errors.
if (!globalThis.__GATSBY_OPEN_LMDBS) {
globalThis.__GATSBY_OPEN_LMDBS = new Map()
}
databases = globalThis.__GATSBY_OPEN_LMDBS.get(fullDbPath)
if (databases) {
return databases
}

const rootDb = getRootDb()
databases = {
nodes: rootDb.openDB({
Expand All @@ -86,6 +109,7 @@ function getDatabases(): ILmdbDatabases {
// dupSort: true
}),
}
globalThis.__GATSBY_OPEN_LMDBS.set(fullDbPath, databases)
}
return databases
}
Expand Down Expand Up @@ -184,10 +208,10 @@ function updateDataStore(action: ActionsUnion): void {
const dbs = getDatabases()
// Force sync commit
dbs.nodes.transactionSync(() => {
dbs.nodes.clear()
dbs.nodesByType.clear()
dbs.metadata.clear()
dbs.indexes.clear()
dbs.nodes.clearSync()
dbs.nodesByType.clearSync()
dbs.metadata.clearSync()
dbs.indexes.clearSync()
})
break
}
Expand Down Expand Up @@ -229,8 +253,8 @@ function updateDataStore(action: ActionsUnion): void {
function clearIndexes(): void {
const dbs = getDatabases()
dbs.nodes.transactionSync(() => {
dbs.metadata.clear()
dbs.indexes.clear()
dbs.metadata.clearSync()
dbs.indexes.clearSync()
})
}

Expand Down
2 changes: 1 addition & 1 deletion packages/gatsby/src/datastore/lmdb/updates/nodes.ts
@@ -1,5 +1,5 @@
import { ActionsUnion, IGatsbyNode } from "../../../redux/types"
import { Database } from "lmdb-store"
import type { Database } from "lmdb"

type NodeId = string

Expand Down
2 changes: 1 addition & 1 deletion packages/gatsby/src/datastore/types.ts
@@ -1,4 +1,4 @@
import { Database } from "lmdb-store"
import { Database } from "lmdb"
import { IGatsbyNode } from "../redux/types"
import { GatsbyGraphQLType } from "../../index"
import { IInputQuery } from "./common/query"
Expand Down
15 changes: 15 additions & 0 deletions packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts
Expand Up @@ -70,6 +70,21 @@ export async function createGraphqlEngineBundle(
],
module: {
rules: [
{
test: require.resolve(`lmdb`),
parser: { amd: false },
use: [
{
loader: require.resolve(`@vercel/webpack-asset-relocator-loader`),
options: {
outputAssetBase: `assets`,
},
},
{
loader: require.resolve(`./lmdb-bundling-patch`),
},
],
},
{
test: /\.m?js$/,
type: `javascript/auto`,
Expand Down
46 changes: 46 additions & 0 deletions packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts
@@ -0,0 +1,46 @@
import { createRequireFromPath } from "gatsby-core-utils"

// This is hacky webpack loader that does string replacements to
// allow lmdb@2 to be bundled by webpack for engines.
// Currently `@vercel/webpack-asset-relocator-loader doesn't handle
// the way lmdb is loading binaries and dictionary file
// (can't statically analyze it). So we perform few localized changes
// and we replace dynamic values with hardcoded ones to allow
// asset-relocator to pick those assets up and handle them.
//
// Because lmdb code can diverge, we also pin version in gatsby
// dependencies and will have manually bump it (with renovate most likely).
//
// To solve this upstream few things would need to change:
// - https://github.com/DoctorEvidence/lmdb-js/blob/544b3fda402f24a70a0e946921e4c9134c5adf85/node-index.js#L14-L16
// - https://github.com/DoctorEvidence/lmdb-js/blob/544b3fda402f24a70a0e946921e4c9134c5adf85/open.js#L77
// Reliance on `import.meta.url` + usage of `.replace` is what seems to cause problems currently.

export default function (source: string): string {
let lmdbBinaryLocation
try {
const lmdbRequire = createRequireFromPath(require.resolve(`lmdb`))
const nodeGypBuild = lmdbRequire(`node-gyp-build`)
const path = require(`path`)

lmdbBinaryLocation = nodeGypBuild.path(
path.dirname(require.resolve(`lmdb`)).replace(`/dist`, ``)
)
} catch (e) {
return source
}

return source
.replace(
`require$1('node-gyp-build')(dirName)`,
`require(${JSON.stringify(lmdbBinaryLocation)})`
)
.replace(
`require$2.resolve('./dict/dict.txt')`,
`require.resolve('../dict/dict.txt')`
)
.replace(
/fs\.readFileSync\(new URL\('\.\/dict\/dict\.txt',\s*\(typeof\s*document\s*===\s*'undefined'\s*\?\s*new\s*\(require\('u'\s*\+\s*'rl'\)\.URL\)\s*\('file:'\s*\+\s*__filename\).href\s*:\s*\(document\.currentScript\s*&&\s*document\.currentScript\.src\s*\|\|\s*new URL\('index\.cjs',\s*document\.baseURI\)\.href\)\)\.replace\(\/dist\[\\\\\\\/\]index\.cjs\$\/,\s*''\)\)\)/g,
`fs.readFileSync(require.resolve('../dict/dict.txt'))`
)
}
2 changes: 1 addition & 1 deletion packages/gatsby/src/utils/cache-lmdb.ts
@@ -1,4 +1,4 @@
import { open, RootDatabase, Database, DatabaseOptions } from "lmdb-store"
import { open, RootDatabase, Database, DatabaseOptions } from "lmdb"
import fs from "fs-extra"
import path from "path"

Expand Down
39 changes: 19 additions & 20 deletions yarn.lock
Expand Up @@ -14583,17 +14583,16 @@ livereload-js@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/livereload-js/-/livereload-js-2.3.0.tgz#c3ab22e8aaf5bf3505d80d098cbad67726548c9a"

lmdb-store@^1.6.11:
version "1.6.11"
resolved "https://registry.yarnpkg.com/lmdb-store/-/lmdb-store-1.6.11.tgz#801da597af8c7a01c81f87d5cc7a7497e381236d"
integrity sha512-hIvoGmHGsFhb2VRCmfhodA/837ULtJBwRHSHKIzhMB7WtPH6BRLPsvXp1MwD3avqGzuZfMyZDUp3tccLvr721Q==
lmdb@2.2.1:
version "2.2.1"
resolved "https://registry.yarnpkg.com/lmdb/-/lmdb-2.2.1.tgz#b7fd22ed2268ab74aa71108b793678314a7b94bb"
integrity sha512-tUlIjyJvbd4mqdotI9Xe+3PZt/jqPx70VKFDrKMYu09MtBWOT3y2PbuTajX+bJFDjbgLkQC0cTx2n6dithp/zQ==
dependencies:
msgpackr "^1.5.4"
nan "^2.14.2"
node-gyp-build "^4.2.3"
ordered-binary "^1.0.0"
weak-lru-cache "^1.0.0"
optionalDependencies:
msgpackr "^1.4.7"
ordered-binary "^1.2.4"
weak-lru-cache "^1.2.2"

load-bmfont@^1.3.1, load-bmfont@^1.4.0:
version "1.4.0"
Expand Down Expand Up @@ -16222,10 +16221,10 @@ msgpackr-extract@^1.0.14:
nan "^2.14.2"
node-gyp-build "^4.2.3"

msgpackr@^1.4.7:
version "1.4.7"
resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.4.7.tgz#d802ade841e7d2e873000b491cdda6574a3d5748"
integrity sha512-bhC8Ed1au3L3oHaR/fe4lk4w7PLGFcWQ5XY/Tk9N6tzDRz8YndjCG68TD8zcvYZoxNtw767eF/7VpaTpU9kf9w==
msgpackr@^1.5.4:
version "1.5.4"
resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.5.4.tgz#2b6ea6cb7d79c0ad98fc76c68163c48eda50cf0d"
integrity sha512-Z7w5Jg+2Q9z9gJxeM68d7tSuWZZGnFIRhZnyqcZCa/1dKkhOCNvR1TUV3zzJ3+vj78vlwKRzUgVDlW4jiSOeDA==
optionalDependencies:
msgpackr-extract "^1.0.14"

Expand Down Expand Up @@ -17121,10 +17120,10 @@ ora@^5.4.1:
strip-ansi "^6.0.0"
wcwidth "^1.0.1"

ordered-binary@^1.0.0:
version "1.1.3"
resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.1.3.tgz#11dbc0a4cb7f8248183b9845e031b443be82571e"
integrity sha512-tDTls+KllrZKJrqRXUYJtIcWIyoQycP7cVN7kzNNnhHKF2bMKHflcAQK+pF2Eb1iVaQodHxqZQr0yv4HWLGBhQ==
ordered-binary@^1.2.4:
version "1.2.4"
resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.2.4.tgz#51d3a03af078a0bdba6c7bc8f4fedd1f5d45d83e"
integrity sha512-A/csN0d3n+igxBPfUrjbV5GC69LWj2pjZzAAeeHXLukQ4+fytfP4T1Lg0ju7MSPSwq7KtHkGaiwO8URZN5IpLg==

ordered-read-streams@^1.0.0:
version "1.0.1"
Expand Down Expand Up @@ -24527,10 +24526,10 @@ wcwidth@^1.0.0, wcwidth@^1.0.1:
dependencies:
defaults "^1.0.3"

weak-lru-cache@^1.0.0:
version "1.1.2"
resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.1.2.tgz#a909a97372aabdfbfe3eb33580af255b3b198834"
integrity sha512-Bi5ae8Bev3YulgtLTafpmHmvl3vGbanRkv+qqA2AX8c3qj/MUdvSuaHq7ukDYBcMDINIaRPTPEkXSNCqqWivuA==
weak-lru-cache@^1.2.2:
version "1.2.2"
resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.2.2.tgz#fdbb6741f36bae9540d12f480ce8254060dccd19"
integrity sha512-DEAoo25RfSYMuTGc9vPJzZcZullwIqRDSI9LOy+fkCJPi6hykCnfKaXTuPBDuXAUcqHXyOgFtHNp/kB2FjYHbw==

web-namespaces@^1.0.0:
version "1.1.2"
Expand Down

0 comments on commit 54d29c4

Please sign in to comment.