Skip to content
This repository was archived by the owner on Feb 25, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,298 changes: 1,901 additions & 2,397 deletions package-lock.json

Large diffs are not rendered by default.

18 changes: 10 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@adobe/helix-data-embed",
"version": "2.2.1",
"version": "2.2.1-hedy",
"private": true,
"description": "Helix Data Embed",
"main": "src/index.js",
Expand All @@ -11,14 +11,15 @@
"lint": "./node_modules/.bin/eslint .",
"semantic-release": "semantic-release",
"commit": "git-cz",
"build": "wsk-builder -v",
"deploy": "wsk-builder -v --deploy --test=/_status_check/healthcheck.json",
"deploy-sequences": "wsk-builder --no-build -no-hints -l latest -l major -l minor",
"deploy-ci": "wsk-builder -v --deploy --test=/_status_check/healthcheck.json --pkgVersion=ci$CIRCLE_BUILD_NUM -l ci"
"build": "hedy -v",
"deploy": "hedy -v --deploy --test=/_status_check/healthcheck.json",
"deploy-sequences": "hedy --no-build -no-hints -l latest -l major -l minor",
"deploy-ci": "hedy -v --deploy --test=/_status_check/healthcheck.json --pkgVersion=ci$CIRCLE_BUILD_NUM -l ci"
},
"wsk": {
"namespace": "helix",
"name": "helix-services/data-embed@${version}"
"name": "helix-services/data-embed@${version}",
"memory": 512
},
"repository": {
"type": "git",
Expand All @@ -31,22 +32,23 @@
},
"homepage": "https://github.com/adobe/helix-data-embed#readme",
"dependencies": {
"@adobe/helix-fetch": "1.9.2",
"@adobe/helix-epsagon": "1.5.5",
"@adobe/helix-fetch": "1.9.2",
"@adobe/helix-onedrive-support": "3.1.6",
"@adobe/helix-shared": "7.18.0",
"@adobe/helix-status": "9.0.0",
"@adobe/openwhisk-action-logger": "2.3.1",
"@adobe/openwhisk-action-utils": "4.4.0",
"@flighter/a1-notation": "1.0.6",
"googleapis": "66.0.0",
"node-fetch": "2.6.1",
"rss-parser": "3.10.0"
},
"devDependencies": {
"@adobe/eslint-config-helix": "1.1.3",
"@adobe/helix-deploy": "1.14.0",
"@adobe/helix-ops": "1.12.3",
"@adobe/helix-testutils": "0.4.2",
"@adobe/openwhisk-action-builder": "2.15.1",
"@semantic-release/changelog": "5.0.1",
"@semantic-release/exec": "5.0.0",
"@semantic-release/git": "9.0.0",
Expand Down
46 changes: 15 additions & 31 deletions src/data-source.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,58 +9,42 @@
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
const querystring = require('querystring');

/**
* Analyses the params and extracts the data source, which is specified by a `src` param.
* For backward compatibility, the source can also be added as path, either escaped or unescaped.
*
* @param {string} params the openwhisk action params
* @return {URL} the extracted data source or null
* @param {Request} req The request
* @return {HEDYContext} the universal deploy context
*/
function dataSource(params) {
const { __ow_path: path = '', src = '' } = params;
function dataSource(req, context) {
const { pathInfo: { suffix } = {} } = context;
const { searchParams } = new URL(req.url);
const src = searchParams.get('src') || '';
let url = null;
if (!path) {
if (!suffix) {
try {
url = new URL(src);
} catch (e) {
return null;
}

// expect the _ow_path to start with /https:/ or /https%3a%2f
// expect the suffix to start with /https:/ or /https%3a%2f
// the escaping done by runtime is inconsistent, the : may be decoded
} else if (path.match(/^\/https(:|%3A)%2F/)) {
url = new URL(decodeURIComponent(path.substring(1)
} else if (suffix.match(/^\/https(:|%3A)%2F/)) {
url = new URL(decodeURIComponent(suffix.substring(1)
.replace(/^https(:|%3A)%2F([^%])/, 'https://$2')));
} else if (!path.startsWith('/https:/')) {
} else if (!suffix.startsWith('/https:/')) {
return null;
} else {
url = new URL(path.substring(1)
url = new URL(suffix.substring(1)
// workaround: Adobe I/O Runtime messes up consecutive spaces in URLs
.replace(/^https:\/\/?([^/])/, 'https://$1'));
}

if (!params.__ow_query) {
// reconstruct __ow_query
const q = {};
Object.keys(params)
.filter((key) => !/^[A-Z]+_[A-Z]+/.test(key))
.filter((key) => key !== 'api')
.filter((key) => key !== 'src')
.filter((key) => !/^__ow_/.test(key))
.forEach((key) => {
q[key] = params[key];
// don't append querybuilder keys to source or if a src param was given
if (!key.startsWith('hlx_') && !params.src) {
url.searchParams.append(key, params[key]);
}
});
// eslint-disable-next-line no-param-reassign
params.__ow_query = querystring.stringify(q);
} else {
// else add it to the url
Object.entries(querystring.parse(params.__ow_query))
if (!src) {
// add query params to data source url
Array.from(searchParams.entries())
.filter(([key]) => (!key.startsWith('hlx_')))
.forEach(([key, value]) => {
url.searchParams.append(key, value);
Expand Down
9 changes: 5 additions & 4 deletions src/embed.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ function hasParams(list, params) {
/**
* Returns the data representation of the resource addressed by url.
* @param {URL} url The url of the resource
* @param {object} params The action params
* @param {Object} params additional params
* @param {object} env The action environment
* @param {Logger} log logger
* @returns {object} an action response with the body containing the data.
*/
function embed(url, params, log) {
function embed(url, params, env, log) {
const candidates = matchers
.filter((candidate) => hasParams(candidate.required, params));
.filter((candidate) => hasParams(candidate.required, env));

const matching = candidates.find((candidate) => candidate.accept(url));

Expand All @@ -50,7 +51,7 @@ function embed(url, params, log) {
}
log.info(`found handler for ${url}: ${matching.name}`);

return matching.extract(url, params, log);
return matching.extract(url, params, env, log);
}

module.exports = embed;
93 changes: 56 additions & 37 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,62 +9,81 @@
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
const { Response } = require('node-fetch');
const { wrap } = require('@adobe/openwhisk-action-utils');
const { logger } = require('@adobe/openwhisk-action-logger');
const { wrap: status } = require('@adobe/helix-status');
const { epsagon } = require('@adobe/helix-epsagon');
const { wrap: helixStatus } = require('@adobe/helix-status');
const embed = require('./embed');
const { loadquerystring } = require('./querybuilder/url');
const { createfilter } = require('./querybuilder/filter');
const dataSource = require('./data-source.js');

const MAX_DATA_SIZE = 750000;

async function main(params) {
async function main(req, context) {
/* istanbul ignore next */
const { __ow_logger: log = console } = params;
const url = dataSource((params));
const { log = console } = context;
const url = dataSource(req, context);
if (!url) {
return {
statusCode: 400,
body: 'Expecting a datasource',
};
return new Response('Expecting a datasource', {
status: 400,
});
}
log.info(`data-embed for datasource ${url}`);
const qbquery = loadquerystring(params.__ow_query, 'hlx_');
log.debug('QB query', qbquery);
const filter = createfilter(qbquery);
log.debug('QB filter', filter);
const result = await embed(url, params, log);
try {
const { searchParams } = new URL(req.url);

const { body } = result;
delete result.body;
log.debug('result', result);
log.debug(`result body size: ${JSON.stringify(body).length}`);
const filtered = filter(body);
let size = JSON.stringify(filtered).length;
log.info(`filtered result ${filtered.length} rows. size: ${size}`);
if (size > MAX_DATA_SIZE) {
// todo: could be optimized to be more accurate using some binary search approach
const avgRowSize = size / filtered.length;
const retain = Math.floor(MAX_DATA_SIZE / avgRowSize);
filtered.splice(retain, filtered.length - retain);
size = JSON.stringify(filtered).length;
log.info(`result truncated to ${filtered.length} rows. size: ${size}`);
}
return {
...result,
body: {
log.info(`data-embed for datasource ${url}`);
const qbquery = loadquerystring(searchParams, 'hlx_');
log.debug('QB query', qbquery);
const filter = createfilter(qbquery);
log.debug('QB filter', filter);
const params = Array.from(searchParams.entries()).reduce((p, [key, value]) => {
// eslint-disable-next-line no-param-reassign
p[key] = value;
return p;
}, {});
const result = await embed(url, params, context.env, log);

const {
body,
statusCode: status,
headers,
} = result;
log.debug(`result body size: ${JSON.stringify(body).length}`);
const filtered = filter(body);
let size = JSON.stringify(filtered).length;
log.info(`filtered result ${filtered.length} rows. size: ${size}`);
if (size > MAX_DATA_SIZE) {
// todo: could be optimized to be more accurate using some binary search approach
const avgRowSize = size / filtered.length;
const retain = Math.floor(MAX_DATA_SIZE / avgRowSize);
filtered.splice(retain, filtered.length - retain);
size = JSON.stringify(filtered).length;
log.info(`result truncated to ${filtered.length} rows. size: ${size}`);
}
const bodyText = JSON.stringify({
total: body.length,
offset: filter.offset || 0,
limit: filtered.length,
data: filtered,
},
};
});
return new Response(bodyText, {
status,
headers,
});
} catch (e) {
log.error('error fetching data', e);
return new Response('error fetching data', {
status: 500,
headers: {
'content-type': 'application/json',
'cache-control': 'no-store, private, must-revalidate',
},
});
}
}

module.exports.main = wrap(main)
.with(epsagon)
.with(status)
.with(helixStatus)
.with(logger.trace)
.with(logger);
10 changes: 6 additions & 4 deletions src/matchers/excel.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
*/
const { OneDrive } = require('@adobe/helix-onedrive-support');

async function extract(url, params, log = console) {
async function extract(url, params, env, log = console) {
const {
AZURE_WORD2MD_CLIENT_ID: clientId,
AZURE_HELIX_USER: username,
AZURE_HELIX_PASSWORD: password,
sheet,
table,
} = params;
const {
AZURE_WORD2MD_CLIENT_ID: clientId,
AZURE_HELIX_USER: username,
AZURE_HELIX_PASSWORD: password,
} = env;

try {
const drive = new OneDrive({
Expand Down
8 changes: 5 additions & 3 deletions src/matchers/google.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ function createOAuthClient(options, creds) {
return oAuth2Client;
}

async function extract(url, params, log = console) {
async function extract(url, params, env, log = console) {
const {
sheet,
} = params;
const {
GOOGLE_DOCS2MD_CLIENT_ID: clientId,
GOOGLE_DOCS2MD_CLIENT_SECRET: clientSecret,
GOOGLE_DOCS2MD_REFRESH_TOKEN: refresh_token,
sheet,
} = params;
} = env;

try {
const spreadsheetId = getId(url);
Expand Down
2 changes: 1 addition & 1 deletion src/matchers/run-query.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const { fetch } = require('@adobe/helix-fetch').context({
});
const { utils } = require('@adobe/helix-shared');

async function extract(url, params, log = console) {
async function extract(url, params, env, log = console) {
const host = 'https://adobeioruntime.net';
const path = '/api/v1/web/helix/helix-services/run-query@v2/';
const query = url.toString().split('/').pop();
Expand Down
13 changes: 9 additions & 4 deletions src/querybuilder/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,11 @@ function nest(obj) {
return transformconjunctions(root);
}

function cast(obj) {
return Object.entries(obj).reduce((o, [k, v]) => {
function cast(searchParams) {
const entries = searchParams instanceof URLSearchParams
? Array.from(searchParams.entries())
: Object.entries(searchParams);
return entries.reduce((o, [k, v]) => {
if (v === 'true' || v === 'false') {
o[k] = (v === 'true');
} else if (!Number.isNaN(Number.parseFloat(v))) {
Expand All @@ -190,8 +193,10 @@ function cast(obj) {
}, {});
}

function loadquerystring(str, prefix = '') {
const obj = cast(parse(str));
function loadquerystring(searchParams, prefix = '') {
const obj = searchParams instanceof URLSearchParams
? cast(searchParams)
: cast(parse(searchParams));

return nest(Object.entries(obj).reduce((o, [k, v]) => {
if (k.startsWith(prefix)) {
Expand Down
Loading