diff --git a/README.md b/README.md index 54d2b544..6a08bdcf 100644 --- a/README.md +++ b/README.md @@ -175,12 +175,7 @@ $ docker-compose exec api node_modules/.bin/babel-node src/scripts/cleanupUrls.j $ node_modules/.bin/babel-node src/scripts/fetchStatsFromGA.js ``` -- To fetch stats for a certain date range, run the above command with -```--startDate=YYYY-MM-DD --endDate=YYYY-MM-DD``` - -- If the script is ran for the first time, run the above command with -```--loadScript``` - +- For more options, run the above script with `--help` or see the file level comments. ## Troubleshooting diff --git a/src/scripts/__tests__/__snapshots__/fetchStatsFromGA.js.snap b/src/scripts/__tests__/__snapshots__/fetchStatsFromGA.js.snap index 54cbdbab..9b1a066b 100644 --- a/src/scripts/__tests__/__snapshots__/fetchStatsFromGA.js.snap +++ b/src/scripts/__tests__/__snapshots__/fetchStatsFromGA.js.snap @@ -1,6 +1,6 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_article_isCron=false 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_article_useContentGroup=false 1`] = ` Object { "dateRanges": Array [ Object { @@ -40,7 +40,7 @@ Object { } `; -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_article_isCron=true 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_article_useContentGroup=true 1`] = ` Object { "dateRanges": Array [ Object { @@ -80,7 +80,7 @@ Object { } `; -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_reply_isCron=false 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_reply_useContentGroup=false 1`] = ` Object { "dateRanges": Array [ Object { @@ -120,7 +120,7 @@ Object { } `; -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_reply_isCron=true 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: LINE_reply_useContentGroup=true 1`] = ` Object { "dateRanges": Array [ Object { @@ -160,7 +160,7 @@ Object { } `; -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_article_isCron=false 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_article_useContentGroup=false 1`] = ` Object { "dateRanges": Array [ Object { @@ -200,7 +200,7 @@ Object { } `; -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_article_isCron=true 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_article_useContentGroup=true 1`] = ` Object { "dateRanges": Array [ Object { @@ -240,7 +240,7 @@ Object { } `; -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_reply_isCron=false 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_reply_useContentGroup=false 1`] = ` Object { "dateRanges": Array [ Object { @@ -280,7 +280,7 @@ Object { } `; -exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_reply_isCron=true 1`] = ` +exports[`fetchStatsFromGA helper functions requestBodyBuilder should return right request body for different source and doc types: WEB_reply_useContentGroup=true 1`] = ` Object { "dateRanges": Array [ Object { @@ -877,16 +877,12 @@ Array [ Array [ "WEB", undefined, - Object { - "isCron": true, - }, + undefined, ], Array [ "LINE", undefined, - Object { - "isCron": true, - }, + undefined, ], ] `; @@ -898,8 +894,8 @@ Array [ undefined, Object { "endDate": "today", - "isCron": false, "startDate": "2020-07-10", + "useContentGroup": false, }, ], Array [ @@ -907,8 +903,8 @@ Array [ undefined, Object { "endDate": "today", - "isCron": false, "startDate": "2020-07-10", + "useContentGroup": false, }, ], ] @@ -960,9 +956,7 @@ Array [ Array [ "WEB", undefined, - Object { - "isCron": true, - }, + undefined, ], Array [ "WEB", @@ -970,9 +964,7 @@ Array [ "article": 100, "reply": 100, }, - Object { - "isCron": true, - }, + undefined, ], Array [ "WEB", @@ -980,16 +972,12 @@ Array [ "article": 200, "reply": -1, }, - Object { - "isCron": true, - }, + undefined, ], Array [ "LINE", undefined, - Object { - "isCron": true, - }, + undefined, ], Array [ "LINE", @@ -997,9 +985,7 @@ Array [ "article": 100, "reply": 100, }, - Object { - "isCron": true, - }, + undefined, ], Array [ "LINE", @@ -1007,9 +993,7 @@ Array [ "article": -1, "reply": 200, }, - Object { - "isCron": true, - }, + undefined, ], ] `; diff --git a/src/scripts/__tests__/fetchStatsFromGA.js b/src/scripts/__tests__/fetchStatsFromGA.js index 10202ff6..032eb237 100644 --- a/src/scripts/__tests__/fetchStatsFromGA.js +++ b/src/scripts/__tests__/fetchStatsFromGA.js @@ -62,9 +62,14 @@ describe('fetchStatsFromGA', () => { }); it('without any arugments', async () => { - yargs.argvMock.mockReturnValue({}); + yargs.argvMock.mockReturnValue({ + useContentGroup: true, + loadScript: false, + }); await fetchStatsFromGA.main(); - expect(updateStatsMock.mock.calls).toMatchObject([[{ isCron: true }]]); + expect(updateStatsMock.mock.calls).toMatchObject([ + [{ useContentGroup: true }], + ]); expect(storeScriptInDBMock).not.toHaveBeenCalled(); }); @@ -72,18 +77,31 @@ describe('fetchStatsFromGA', () => { yargs.argvMock.mockReturnValue({ startDate: '2020-01-01', endDate: '2020-02-01', + useContentGroup: true, + loadScript: false, }); await fetchStatsFromGA.main(); expect(updateStatsMock.mock.calls).toMatchObject([ - [{ isCron: false, startDate: '2020-01-01', endDate: '2020-02-01' }], + [ + { + useContentGroup: true, + startDate: '2020-01-01', + endDate: '2020-02-01', + }, + ], ]); expect(storeScriptInDBMock).not.toHaveBeenCalled(); }); it('with loadScript arugments', async () => { - yargs.argvMock.mockReturnValue({ loadScript: true }); + yargs.argvMock.mockReturnValue({ + loadScript: true, + useContentGroup: true, + }); await fetchStatsFromGA.main(); - expect(updateStatsMock.mock.calls).toMatchObject([[{ isCron: true }]]); + expect(updateStatsMock.mock.calls).toMatchObject([ + [{ useContentGroup: true }], + ]); expect(storeScriptInDBMock).toHaveBeenCalled(); }); @@ -92,10 +110,17 @@ describe('fetchStatsFromGA', () => { loadScript: true, startDate: '2020-01-01', endDate: '2020-02-01', + useContentGroup: true, }); await fetchStatsFromGA.main(); expect(updateStatsMock.mock.calls).toMatchObject([ - [{ isCron: false, startDate: '2020-01-01', endDate: '2020-02-01' }], + [ + { + useContentGroup: true, + startDate: '2020-01-01', + endDate: '2020-02-01', + }, + ], ]); expect(storeScriptInDBMock).toHaveBeenCalled(); }); @@ -122,11 +147,11 @@ describe('fetchStatsFromGA', () => { allSourceTypes.forEach(sourceType => allDocTypes.forEach(docType => - [true, false].forEach(isCron => { + [true, false].forEach(useContentGroup => { const params = { - isCron, - startDate: isCron ? undefined : '2020-07-10', - endDate: isCron ? undefined : 'today', + useContentGroup, + startDate: useContentGroup ? undefined : '2020-07-10', + endDate: useContentGroup ? undefined : 'today', }; expect( fetchStatsFromGA.requestBodyBuilder( @@ -135,44 +160,14 @@ describe('fetchStatsFromGA', () => { '', params ) - ).toMatchSnapshot(`${sourceType}_${docType}_isCron=${isCron}`); + ).toMatchSnapshot( + `${sourceType}_${docType}_useContentGroup=${useContentGroup}` + ); }) ) ); }); - it('processCommandLineArgs should return proper params', () => { - const processArgs = fetchStatsFromGA.processCommandLineArgs; - expect(processArgs({})).toStrictEqual({ isCron: true }); - expect( - processArgs({ startDate: '2020-07-01', endDate: '2020-07-15' }) - ).toStrictEqual({ - isCron: false, - startDate: '2020-07-01', - endDate: '2020-07-15', - }); - expect( - processArgs({ startDate: '2020-07-01', endDate: '2020-07-01' }) - ).toStrictEqual({ - isCron: false, - startDate: '2020-07-01', - endDate: '2020-07-01', - }); - }); - - it('processCommandLineArgs should raise errors when given invalid arugments', () => { - [ - { startDate: '2020-01-01' }, - { startDate: '2020-01-01', endDate: '2019-01-01' }, - { startDate: '2019-01-01', endDate: '2020-01-01' }, - { startDate: '3000-01-01', endDate: '3000-01-01' }, - ].forEach(dateRange => { - expect(() => - fetchStatsFromGA.processCommandLineArgs(dateRange) - ).toThrow(); - }); - }); - it('storeScriptInDB should store upsert script in db', async () => { await fetchStatsFromGA.storeScriptInDB(); expect( @@ -215,7 +210,7 @@ describe('fetchStatsFromGA', () => { fetchReportsMock.mockClear(); await fetchStatsFromGA.updateStats({ - isCron: false, + useContentGroup: false, startDate: '2020-07-10', endDate: 'today', }); @@ -291,7 +286,7 @@ describe('fetchStatsFromGA', () => { const fetchReportsResults = await fetchStatsFromGA.fetchReports( 'WEB', {}, - { isCron: true } + { useContentGroup: true } ); expect(fetchReportsResults).toStrictEqual({ @@ -303,7 +298,7 @@ describe('fetchStatsFromGA', () => { it('should send approate batchGet requests and return correct curated results', async () => { const sourceType = 'WEB', - params = { isCron: true }; + params = { useContentGroup: true }; let requestBuilderCalledTimes = 1; for (const pageTokens of fixtures.fetchReports.allPossiblePageTokens) { diff --git a/src/scripts/fetchStatsFromGA.js b/src/scripts/fetchStatsFromGA.js index 52d0ec9f..b42c51d3 100644 --- a/src/scripts/fetchStatsFromGA.js +++ b/src/scripts/fetchStatsFromGA.js @@ -1,18 +1,30 @@ // eslint-disable no-console -// TODO: consider the edge case when the cron job runs at midnight -// the first cron job of the day should also update the value for yesterday +/* + A script that fetches user activities stats between `startDate` and `endDate` from GA. + + - Default values for `startDate` and `endDate` are the current date (in GMT+8), + they can be set by command line arguments. Date should be in the format of + YYYY-MM-DD or see https://developers.google.com/analytics/devguides/reporting/core/v3/reference#startDate + for relative date pattern. + + - All update operations in db are handled by the script with id `analyticsUpsertScript`, + if `analyticsUpsertScript` is not in db yet, run with `--loadScript`` to save + the script to db. + + - A content group that extracts docId from URL should be set by admin in GA + as the first content group. Because content group is not retroactive, to + fetch data without content group, run with `--useContentGroup=false`. + It would use pagePathLevel2 as primary dimension and extracts docId from there. +*/ import 'dotenv/config'; import client from 'util/client'; import rollbar from '../rollbarInstance'; import { google } from 'googleapis'; -import { assertDateRange } from 'util/date'; import yargs from 'yargs'; const analyticsreporting = google.analyticsreporting('v4'); -const maxDuration = 90 * 24 * 60 * 60 * 1000; - const pageSize = process.env.GA_PAGE_SIZE || '10000'; const webViewId = process.env.GA_WEB_VIEW_ID; const lineViewId = process.env.GA_LINE_VIEW_ID; @@ -34,8 +46,8 @@ const statsSources = { WEB: { filtersExpression: docType => `ga:pagePathLevel1==/${docType}/`, name: 'WEB', - primaryDimension: (isCronjob = true) => - isCronjob ? 'ga:contentGroup1' : 'ga:pagePathLevel2', + primaryDimension: (useContentGroup = true) => + useContentGroup ? 'ga:contentGroup1' : 'ga:pagePathLevel2', primaryMetric: 'ga:pageviews', viewId: webViewId, }, @@ -89,7 +101,11 @@ const parseIdFromRow = function(row) { // Contructs request body for google reporting API. const requestBodyBuilder = function(sourceType, docType, pageToken, params) { - const { isCron, startDate = 'today', endDate = 'today' } = params; + const { + useContentGroup = true, + startDate = 'today', + endDate = 'today', + } = params; let { filtersExpression, primaryDimension, @@ -98,13 +114,16 @@ const requestBodyBuilder = function(sourceType, docType, pageToken, params) { } = statsSources[sourceType]; return { dateRanges: [{ startDate, endDate }], - dimensions: [{ name: primaryDimension(isCron) }, { name: 'ga:date' }], + dimensions: [ + { name: primaryDimension(useContentGroup) }, + { name: 'ga:date' }, + ], filtersExpression: filtersExpression(docType), includeEmptyRows: false, metrics: [{ expression: primaryMetric }, { expression: 'ga:users' }], orderBys: [ { fieldName: 'ga:date' }, - { fieldName: primaryDimension(isCron) }, + { fieldName: primaryDimension(useContentGroup) }, ], pageSize, pageToken, @@ -112,24 +131,13 @@ const requestBodyBuilder = function(sourceType, docType, pageToken, params) { }; }; -const processCommandLineArgs = args => { - const { startDate, endDate } = args; - if (!startDate && !endDate) { - return { isCron: true }; - } - - assertDateRange(startDate, endDate, maxDuration); - - return { isCron: false, startDate, endDate }; -}; - /** * Given a sourceType, fetch stats for all doc types from startDate to endDate (inclusive). * @param {string} sourceType * @param {object} [pageTokens={}] Mapping of each doc type to its page token * @param {object} params Object of the from: - {isCron: [bool=true], [startDate: string], [endDate: string]} + {useContentGroup: [bool=true], [startDate: string], [endDate: string]} * @return { results: {object} a mapping of doc type to its result, @@ -329,9 +337,9 @@ const processReport = async function( * Fetch GA stats for given time period and store in db. * * @param {object} params Object of the from: - {isCron: [bool=true], [startDate: string], [endDate: string]} + {useContentGroup: [bool=true], [startDate: string], [endDate: string]} */ -const updateStats = async function(params = { isCron: true }) { +const updateStats = async function(params) { for (const sourceType of allSourceTypes) { let results, pageTokens, @@ -370,22 +378,33 @@ async function main() { .options({ startDate: { alias: 's', - description: 'start date in the format of YYYY-MM-DD', + description: + 'start date in the format of YYYY-MM-DD or see https://developers.google.com/analytics/devguides/reporting/core/v3/reference#startDate for accepted patterns for relative dates', type: 'string', }, endDate: { alias: 'e', - description: 'end date in the format of YYYY-MM-DD', + description: + 'end date in the format of YYYY-MM-DD or see https://developers.google.com/analytics/devguides/reporting/core/v3/reference#endDate for accepted patterns for relative dates', type: 'string', }, loadScript: { default: false, description: 'whether to store upsert script in db', }, + useContentGroup: { + default: true, + description: + 'wheter to use ga:contentGroup1 as a dimension for web stats', + }, }) .help('help').argv; - const params = processCommandLineArgs(argv); + const params = { + startDate: argv.startDate, + endDate: argv.endDate, + useContentGroup: argv.useContentGroup, + }; if (argv.loadScript) { await storeScriptInDB(); @@ -409,7 +428,6 @@ export default { fetchReports, main, parseIdFromRow, - processCommandLineArgs, processReport, requestBodyBuilder, statsSources,