Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: mongodb async context fix #3657

Closed
wants to merge 14 commits into from
8 changes: 4 additions & 4 deletions .tav.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,22 +143,22 @@ mongodb-core:
mongodb-3:
name: mongodb
versions: '>=3.3 <4'
commands: node test/instrumentation/modules/mongodb.test.js
commands: node test/instrumentation/modules/mongodb/mongodb.test.js
mongodb-4:
name: mongodb
versions: '>=4 <5'
node: '>=12.9.0'
commands: node test/instrumentation/modules/mongodb.test.js
commands: node test/instrumentation/modules/mongodb/mongodb.test.js
mongodb-5:
name: mongodb
versions: '>=5 <6'
node: '>=14.20.1'
commands: node test/instrumentation/modules/mongodb.test.js
commands: node test/instrumentation/modules/mongodb/mongodb.test.js
mongodb:
name: mongodb
versions: '>=6 <7'
node: '>=15.0.0'
commands: node test/instrumentation/modules/mongodb.test.js
commands: node test/instrumentation/modules/mongodb/mongodb.test.js

# Bluebird is effectively deprecated (https://github.com/petkaantonov/bluebird#%EF%B8%8Fnote%EF%B8%8F).
# Testing the full set of supported bluebird releases (`>=2 <4`) is currently
Expand Down
92 changes: 54 additions & 38 deletions lib/instrumentation/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ var MODULES = [
['@elastic/elasticsearch', '@elastic/elasticsearch-canary'],
'@opentelemetry/api',
'@opentelemetry/sdk-metrics',
'@node-redis/client/dist/lib/client',
'@node-redis/client/dist/lib/client/commands-queue',
'@redis/client/dist/lib/client',
'@redis/client/dist/lib/client/commands-queue',
// XXX: check what is the path for theses ones
'@node-redis/client/dist/lib/client/index.js',
'@node-redis/client/dist/lib/client/commands-queue.js',
'@redis/client/dist/lib/client/index.js',
'@redis/client/dist/lib/client/commands-queue.js',
'apollo-server-core',
'aws-sdk',
'bluebird',
Expand All @@ -73,6 +74,7 @@ var MODULES = [
'mimic-response',
'mongodb-core',
'mongodb',
'mongodb/lib/cmap/connection_pool.js',
'mysql',
'mysql2',
'next/dist/server/api-utils/node',
Expand Down Expand Up @@ -134,6 +136,12 @@ function Instrumentation(agent) {
// because the array is given to the hook(...) call.
this._patches = new NamedArray();

// NOTE: since RITM is using internals it will ask for
// the version for each sub-module. As a consequence agent will
// do many unnecesary reads to get the package version.
// A cache will avoid these reads
this._versionsCache = new Map();

for (let modName of MODULES) {
if (!Array.isArray(modName)) modName = [modName];
const pathName = modName[0];
Expand All @@ -142,7 +150,9 @@ function Instrumentation(agent) {
// Lazy require so that we don't have to use `require.resolve` which
// would fail in combination with Webpack. For more info see:
// https://github.com/elastic/apm-agent-nodejs/pull/957
return require(`./modules/${pathName}.js`)(...args);
const modExt = pathName.endsWith('.js') ? '' : '.js';
david-luna marked this conversation as resolved.
Show resolved Hide resolved
const modPath = `./modules/${pathName}${modExt}`;
return require(modPath)(...args);
});
}

Expand Down Expand Up @@ -358,35 +368,37 @@ Instrumentation.prototype._startHook = function () {

this._agent.logger.debug('adding hooks to Node.js module loader');

this._ritmHook = new RitmHook(this._patches.keys, function (
exports,
name,
basedir,
) {
const enabled = self._isModuleEnabled(name);
var pkg, version;

const isHandlingLambda =
self._lambdaHandlerInfo && self._lambdaHandlerInfo.module === name;

if (!isHandlingLambda && basedir) {
pkg = path.join(basedir, 'package.json');
try {
version = JSON.parse(fs.readFileSync(pkg)).version;
} catch (e) {
self._agent.logger.debug(
'could not shim %s module: %s',
name,
e.message,
);
return exports;
this._ritmHook = new RitmHook(
this._patches.keys,
// Need to enable internals so we hook into sub-modules even
// if the main module is hooked
// see: https://github.com/elastic/apm-agent-nodejs/pull/3657/files#r1344193251
{ internals: true },
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for reviewer: check the comment and give feedback please. Was okay to pass the new option? what about other instrumentations.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We weren't using internals: true before the changes adding ESM support:

-  this._hook = new Hook(this._patches.keys, function (exports, name, basedir) {
+  this._ritmHook = new RitmHook(this._patches.keys, function (exports, name, basedir) {

so I don't think this is what internals is about. I'm not positive, however.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

internals: true isn't needed if the sub-module path, e.g. @redis/client/dist/lib/client, is an entry in the first argument to RitmHook -- i.e. if it is on the explicit list of modules to hook into. If I add these debug-prints to RITM:

--- /Users/trentm/el/require-in-the-middle/index.js	2023-07-13 15:32:26
+++ ./node_modules/require-in-the-middle/index.js	2023-10-03 09:26:14
@@ -126,6 +126,7 @@
   const self = this
   const patching = new Set()
   const internals = options ? options.internals === true : false
+  console.log('XXX ritm: internals=%s', internals);
   const hasWhitelist = Array.isArray(modules)

   debug('registering require hook')
@@ -204,6 +205,7 @@
       }
       moduleName = filename
     } else if (hasWhitelist === true && modules.includes(filename)) {
+      console.log('XXX ritm abspath in allowlist: hasWhitelist=%s modules.includes(filename)=%s filename=%s', hasWhitelist, modules.includes(filename), filename);
       // whitelist includes the absolute path to the file including extension
       const parsedPath = path.parse(filename)
       moduleName = parsedPath.name
@@ -227,6 +229,7 @@
       if (hasWhitelist === true && modules.includes(moduleName) === false) {
         if (modules.includes(fullModuleName) === false) return exports // abort if module name isn't on whitelist

+        console.log('XXX ritm allowlisted sub-module: moduleName=%s fullModuleName=%s filename=%s', moduleName, fullModuleName, filename)
         // if we get to this point, it means that we're requiring a whitelisted sub-module
         moduleName = fullModuleName
       } else {
diff --git a/lib/instrumentation/index.js b/lib/instrumentation/index.js
index b90152f7..29e4bbb7 100644
--- a/lib/instrumentation/index.js
+++ b/lib/instrumentation/index.js
@@ -363,6 +363,7 @@ Instrumentation.prototype._startHook = function () {
     name,
     basedir,
   ) {
+    console.log('XXX RitmHook called:', name, basedir);
     const enabled = self._isModuleEnabled(name);
     var pkg, version;

and run examples/trace-redis.js:

% node examples/trace-redis.js
{"log.level":"info","@timestamp":"2023-10-03T16:30:11.754Z","log":{"logger":"elastic-apm-node"},"agentVersion":"4.0.0","env":{"pid":19696,"proctitle":"node","os":"darwin 22.6.0","arch":"x64","host":"pink.local","timezone":"UTC-0700","runtime":"Node.js v16.20.2"},"config":{"serviceName":{"source":"start","value":"example-trace-redis4","commonName":"service_name"},"serviceVersion":{"source":"default","value":"4.0.0","commonName":"service_version"},"serverUrl":{"source":"default","value":"http://127.0.0.1:8200/","commonName":"server_url"},"logLevel":{"source":"default","value":"info","commonName":"log_level"},"spanCompressionEnabled":{"source":"start","value":false}},"activationMethod":"require","ecs":{"version":"1.6.0"},"message":"Elastic APM Node.js Agent v4.0.0"}
XXX ritm: internals=false
XXX ritm allowlisted sub-module: moduleName=@redis/client fullModuleName=@redis/client/dist/lib/client/commands-queue filename=/Users/trentm/el/apm-agent-nodejs/node_modules/@redis/client/dist/lib/client/commands-queue.js
XXX RitmHook called: @redis/client/dist/lib/client/commands-queue /Users/trentm/el/apm-agent-nodejs/node_modules/@redis/client
XXX RitmHook called: generic-pool /Users/trentm/el/apm-agent-nodejs/node_modules/generic-pool
XXX ritm allowlisted sub-module: moduleName=@redis/client fullModuleName=@redis/client/dist/lib/client filename=/Users/trentm/el/apm-agent-nodejs/node_modules/@redis/client/dist/lib/client/index.js
XXX RitmHook called: @redis/client/dist/lib/client /Users/trentm/el/apm-agent-nodejs/node_modules/@redis/client
XXX RitmHook called: redis /Users/trentm/el/apm-agent-nodejs/node_modules/redis
PING res:  PONG
SET res:  OK
GET res:  bar
MULTI res:  [ 'OK', 'eggs' ]

The RITM opts.internals value is only used later for internal module paths that are not on the explicitly set of modules to hook.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not get those logs for mongodb and I think I know why. The last log inside ./node_modules/require-in-the-middle/index.js is inside a conditional

if (hasWhitelist === true && modules.includes(moduleName) === false) {
    if (modules.includes(fullModuleName) === false) return exports // abort if module name isn't on whitelist

    console.log('XXX ritm allowlisted sub-module: moduleName=%s fullModuleName=%s filename=%s', moduleName, fullModuleName, filename)
    // more code here ...     
}

In my case

  • moduleName === 'mongodb'
  • fullModuleName === 'mongodb/lib/cmap/connection_pool'

The expression allows to pass sub-modules only if the root module is not in the whitelist. I this case the agent is instrumenting the main module and one sub-module. I think it works on @redis because its instrumention only sub-modules.

I see two options here. Let me know what you think

  • change the instrumentation and go for sub-modules (need to check if feasible)
  • make use of the internals option.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • make use of the internals option.

I'd prefer to avoid this. It will mean that we will be calling our onRequire hook function for every require'd file under each of the modules we instrument (every lib file in ioredis, in express, in fastify, etc.). There may be a perf impact of this.


Option 3: Remove that limitation in RITM.

I think this is a bug in RITM that you cannot do this. Having 'mongodb' on the list of modules to patch shouldn't mean that you cannot patch mongodb/lib/cmap/connection_pool. I'll have to stare at this longer... the if/else's in RITM make my eyes swim a bit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. I still think this is a RITM bug. We should open an issue about it. However, I don't want to fix that right now.
  2. I've changed my mind. I think we should use the internals: true and name our patches of sub-module files with the trailing .js -- i.e. mongodb/lib/cmap/connection_pool.js instead of without the .js. I have started implementing this (along with implementing Allow Logical Grouping of Instrumentation by Name #2992 so that disableInstrumentations: 'mongodb' would suffice to disable both instrumentation of mongodb and mongodb/lib/cmap/connection_pool.js.

I haven't finished this yet. There is also a potential hiccup in maintaining full support for https://www.elastic.co/guide/en/apm/agent/nodejs/current/agent-api.html#apm-add-patch I think it'll be fine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. That's okay. There is no need to rush in a fix there and we still have to check what's the impact since no one hasn't noticed.
  2. I've done the changes to have sub-modules with the .js extension. I've noticed our Hook is reading the package.json file each time is called so enabling internals will do many reads. To avoid this I made use of a Map to cache versions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR solves our use case for instrumenting module and sub-module at the same time. which is what we need for mongodb.

We still have to look at RITM but since the issue is only reported by us I think we have no rush.

function (exports, name, basedir) {
const enabled = self._isModuleEnabled(name);
var version;

const isHandlingLambda =
self._lambdaHandlerInfo && self._lambdaHandlerInfo.module === name;

if (!isHandlingLambda && basedir) {
try {
version = self._getPackageVersion(name, basedir);
} catch (e) {
self._agent.logger.debug(
'could not shim %s module: %s',
name,
e.message,
);
return exports;
}
} else {
version = process.versions.node;
}
} else {
version = process.versions.node;
}

return self._patchModule(exports, name, version, enabled, false);
});
return self._patchModule(exports, name, version, enabled, false);
},
);

this._iitmHook = IitmHook(
Object.keys(IITM_MODULES),
Expand All @@ -412,22 +424,26 @@ Instrumentation.prototype._startHook = function () {
};

Instrumentation.prototype._getPackageVersion = function (modName, modBaseDir) {
const modPath = path.join(modBaseDir, 'package.json');
if (this._versionsCache.has(modPath)) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for reviewer: both RITM and IITM hooks use this function and will get cached result if they request many times the version for the same pacakge.json file

return this._versionsCache.get(modPath);
}

try {
const version = JSON.parse(
fs.readFileSync(path.join(modBaseDir, 'package.json')),
).version;
const version = JSON.parse(fs.readFileSync(modPath)).version;
if (typeof version === 'string') {
this._versionsCache.set(modPath, version);
return version;
} else {
return undefined;
}
} catch (err) {
this._agent.logger.debug(
{ modName, modBaseDir, err },
'could not load package version',
);
return undefined;
}

this._versionsCache.set(modPath, undefined);
return undefined;
};

/**
Expand Down
39 changes: 39 additions & 0 deletions lib/instrumentation/modules/mongodb/lib/cmap/connection_pool.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and other contributors where applicable.
* Licensed under the BSD 2-Clause License; you may not use this file except in
* compliance with the BSD 2-Clause License.
*/

'use strict';

const { AsyncResource } = require('async_hooks');

const semver = require('semver');

module.exports = (mod, agent, { version, enabled }) => {
if (!enabled) return mod;
if (!semver.satisfies(version, '>=3.3 <7.0')) {
agent.logger.debug(
'mongodb version %s not instrumented (mongodb <3.3 is instrumented via mongodb-core)',
version,
);
return mod;
}

if (mod.ConnectionPool) {
class ConnectionPoolTraced extends mod.ConnectionPool {
checkOut(callback) {
return super.checkOut(AsyncResource.bind(callback));
}
}

Object.defineProperty(mod, 'ConnectionPool', {
enumerable: true,
get: function () {
return ConnectionPoolTraced;
},
});

return mod;
}
};
3 changes: 2 additions & 1 deletion test/_is_mongodb_incompat.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
'use strict';

var semver = require('semver');
const { safeGetPackageVersion } = require('./_utils');

/**
* Return whether the current 'mongodb' version is incompatible with the
Expand All @@ -26,7 +27,7 @@ var semver = require('semver');
*/
function isMongodbIncompat() {
const nodeVer = process.version;
const mongodbVer = require('mongodb/package.json').version;
const mongodbVer = safeGetPackageVersion('mongodb');
const msg = `mongodb@${mongodbVer} is incompatible with node@${nodeVer}`;

if (semver.satisfies(mongodbVer, '4.x')) {
Expand Down
Loading
Loading