Skip to content

Commit

Permalink
Merge pull request #155 from harvard-lil/summary-feature-refactoring
Browse files Browse the repository at this point in the history
Refactoring: "summary" and "provenanceInfo" features
  • Loading branch information
matteocargnelutti committed May 16, 2023
2 parents deabaab + 800ec47 commit 60e86f4
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 18 deletions.
27 changes: 13 additions & 14 deletions Scoop.js
Original file line number Diff line number Diff line change
Expand Up @@ -1387,13 +1387,8 @@ export class Scoop {
* @property {boolean} targetUrlIsWebPage
* @property {string} targetUrlContentType
* @property {ScoopOptions} options
* @property {string} startedAt - ISO-formated date
* @property {string[]} blockedRequests
* @property {string[]} noArchiveUrls
* @property {?string} captureIp
* @property {?string} userAgent
* @property {string[]} exchangeUrls
* @property {object} attachments
* @property {string} startedAt - ISO-formatted date
* @property {object} attachments - Summary of generated exchange filenames.
* @property {?string} attachments.provenanceSummary - Filename
* @property {?string} attachments.screenshot - Filename
* @property {?string} attachments.pdfSnapshot - Filename
Expand All @@ -1403,10 +1398,11 @@ export class Scoop {
* @property {?string[]} attachments.videoExtracted - Filenames
* @property {?string[]} attachments.videoExtractedSubtitles - Filenames
* @property {?string[]} attachments.certificates - Filenames
* @property {?object} provenanceInfo - See {@link Scoop.provenanceInfo}. Only populated if the "provenanceSummary" option was turned on.
*/

/**
* Generates and returns a summary of the current capture object, regardless of its state.
* Generates and returns a summary of the current capture, regardless of its state.
* @returns {Promise<ScoopCaptureSummary>}
*/
async summary () {
Expand All @@ -1416,14 +1412,17 @@ export class Scoop {
targetUrl: this.url,
targetUrlIsWebPage: this.targetUrlIsWebPage,
targetUrlContentType: this.targetUrlContentType,
options: this.options,
startedAt: this.startedAt,
blockedRequests: [],
noArchiveUrls: [],
captureIp: this.provenanceInfo?.captureIp,
userAgent: this.provenanceInfo?.userAgent,
options: this.options,
exchangeUrls: this.exchanges.map(exchange => exchange.url),
attachments: {}
attachments: {},
provenanceInfo: this.options.provenanceSummary ? this.provenanceInfo : {}
// NOTE:
// `provenanceInfo` also contains an `options` object,
// but some of its properties have been edited because it is meant to be embedded in a WACZ.
// (For example: Paths replaced with hashes)
// For that reason, it is worth keeping both `options` objects,
// because `provenanceInfo.options` is both different and contextual.
}

//
Expand Down
10 changes: 6 additions & 4 deletions exporters/scoopToWACZ.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,14 @@ export async function scoopToWACZ (capture, includeRaw = false, signingServer) {
description: capture.pageInfo?.description
? capture.pageInfo.description
: `Captured by Scoop on ${capture.startedAt.toISOString()}`,
// Optional: signing url / token, provenance info
// Optional: signing url / token, capture state, provenance info
signingUrl: signingServer?.url,
signingToken: signingServer?.token,
datapackageExtras: capture.options.provenanceSummary
? { provenanceInfo: capture.provenanceInfo }
: null
datapackageExtras: {
state: capture.state,
states: Object.keys(Scoop.states),
provenanceInfo: capture.options.provenanceSummary ? capture.provenanceInfo : null
}
})
} catch (err) {
capture.log.trace(err)
Expand Down

0 comments on commit 60e86f4

Please sign in to comment.