diff --git a/reverse_engineering/helpers/postgresService.js b/reverse_engineering/helpers/postgresService.js index c0ba570..284d104 100644 --- a/reverse_engineering/helpers/postgresService.js +++ b/reverse_engineering/helpers/postgresService.js @@ -287,6 +287,12 @@ module.exports = { const tableForeignKeys = await db.queryTolerant(queryConstants.GET_TABLE_FOREIGN_KEYS, [tableOid]); const triggers = await this._getTriggers(schemaName, tableName, schemaOid, tableOid, ignoreUdfUdpTriggers); + logger.info('Table data retrieved', { + schemaName, + tableName, + columnTypes: tableColumns.map(column => column.data_type), + }); + const partitioning = prepareTablePartition(partitionResult, tableColumns); const tableLevelProperties = prepareTableLevelData(tableLevelData, tableToastOptions); const description = getDescriptionFromResult(descriptionResult); @@ -352,7 +358,7 @@ module.exports = { tableOid, ]); - return _.map(tableColumns, (columnData, index) => { + return _.map(tableColumns, columnData => { return { ...columnData, ...(_.find(tableColumnsAdditionalData, { name: columnData.column_name }) || {}), @@ -368,13 +374,23 @@ module.exports = { (await db.queryTolerant(queryConstants.GET_ROWS_COUNT(fullTableName), [], true))?.quantity || 0; const limit = getLimit(quantity, recordSamplingSettings); - const jsonColumns = _.chain(attributes) - .filter(({ type }) => _.includes(['json', 'jsonb'], type)) - .map('name') - .join(', ') - .value(); + const jsonColumns = attributes.filter(({ type }) => _.includes(['json', 'jsonb'], type)); + + const jsonColumnsString = _.map(jsonColumns, 'name').join(', '); + + const samplingDataSize = await db.queryTolerant( + queryConstants.GET_SAMPLED_DATA_SIZE(fullTableName, jsonColumnsString), + [limit], + true, + ); + + logger.info('Sampling table', { + tableName, + jsonColumnsNumber: jsonColumns.length, + samplingDataSize: samplingDataSize?._hackolade_tmp_sampling_tbl_size, + }); - return await db.queryTolerant(queryConstants.GET_SAMPLED_DATA(fullTableName, jsonColumns), [limit]); + return await db.queryTolerant(queryConstants.GET_SAMPLED_DATA(fullTableName, jsonColumnsString), [limit]); }, async _retrieveSingleViewData(schemaOid, schemaName, ignoreUdfUdpTriggers, viewName) { diff --git a/reverse_engineering/helpers/queryConstants.js b/reverse_engineering/helpers/queryConstants.js index 68a7a4f..d9634c9 100644 --- a/reverse_engineering/helpers/queryConstants.js +++ b/reverse_engineering/helpers/queryConstants.js @@ -154,6 +154,9 @@ const queryConstants = { GET_DESCRIPTION_BY_OID: `SELECT obj_description($1)`, GET_ROWS_COUNT: fullTableName => `SELECT COUNT(*) AS quantity FROM ${fullTableName};`, GET_SAMPLED_DATA: (fullTableName, jsonColumns) => `SELECT ${jsonColumns} FROM ${fullTableName} LIMIT $1;`, + GET_SAMPLED_DATA_SIZE: (fullTableName, jsonColumns) => ` + SELECT sum(pg_column_size(_hackolade_tmp_sampling_tbl.*)) AS _hackolade_tmp_sampling_tbl_size + FROM (SELECT ${jsonColumns} FROM ${fullTableName} LIMIT $1) AS _hackolade_tmp_sampling_tbl;`, GET_INHERITS_PARENT_TABLE_NAME: ` SELECT pc.relname AS parent_table_name FROM pg_catalog.pg_inherits AS pi INNER JOIN pg_catalog.pg_class AS pc