Skip to content

Commit

Permalink
Only request diffs for types we can diff
Browse files Browse the repository at this point in the history
This got much more complex than anticipated! There are a few major bits here:

- We now have a `MediaType` type for modeling and comparing different media types. It includes a concept of canonical types, so `parseMediaType('text/html').equals(parseMediaType('application/xhtml')) === true`. If we can reliably clean this up in the DB, we can get rid of that, but it's useful for now.
  Similarly, we can also get media types for file extensions (it only supports a minimal list of extensions we know we have in the DB). Once the DB has content types for all versions, we can get rid of that, too.

- The `diffTypes` module now has a `diffTypesFor(mediaType)` method, allowing us to get the diffs that are appropriate to present for a given type of content. We've also added three "raw" diff types which indicate we should simply show the content in an `<iframe>` because we don't know how to diff it (or if the two versions in question have differing content types, which often happens when a PDF or image responded with an HTML error page [like a 404] for a given version).

- The `SelectDiffType` form control now takes a list of diff types (from the above `diffTypesFor()`) to show.

- There's a little extra complexity in retrieving an actual diff where we check to see if the diff type has an actual diff service associated with it ("raw" types don't, because they represent viewing the actual content, not a diff). We request the actual raw content there because we need to sniff it for HTML (when we present HTML, we need to set a `<base>` tag so associated images, styles, and scripts work). This part is definitely a little janky, but I don't see a clear improvement without a lot more other changes. It slightly exacerbates the existing race condition issue. The sniffing HTML situation should/could probably be fixed alongside edgi-govdata-archiving/web-monitoring#92, where we basically need some kind of proxy or pre-cleaned-up version to render.

- Finally, this makes a slight tweak to the 'no changes' message; it shows a message indicating two versions are exactly the same if their hashes are identical, but shows the old message (with slightly more detail) if the hashes are different but `change_count` is 0.

Fixes #179.
  • Loading branch information
Mr0grog committed Feb 5, 2018
1 parent 08c3f54 commit 4efa529
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 12 deletions.
44 changes: 43 additions & 1 deletion src/components/change-view.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ import SelectDiffType from './select-diff-type';
import SelectVersion from './select-version';
import Loading from './loading';
import VersionistaInfo from './versionista-info';
import {diffTypesFor} from '../constants/diff-types';
import {
htmlType,
mediaTypeForExtension,
parseMediaType,
unknownType
} from '../scripts/media-type';

const collapsedViewStorage = 'WebMonitoring.ChangeView.collapsedView';

Expand Down Expand Up @@ -46,6 +53,10 @@ export default class ChangeView extends React.Component {
const page = this.props.page;
if (page.versions && page.versions.length > 1) {
this.state.diffType = 'SIDE_BY_SIDE_RENDERED';
const relevantTypes = relevantDiffTypes(this.props.from, this.props.to);
if (!relevantTypes.find(type => type.value === this.state.diffType)) {
this.state.diffType = relevantTypes[0].value;
}
}

if ('sessionStorage' in window) {
Expand Down Expand Up @@ -130,7 +141,11 @@ export default class ChangeView extends React.Component {
</label>
<label className="version-selector__item">
<span>Comparison:</span>
<SelectDiffType value={this.state.diffType} onChange={this.handleDiffTypeChange} />
<SelectDiffType
types={relevantDiffTypes(this.props.from, this.props.to)}
value={this.state.diffType}
onChange={this.handleDiffTypeChange}
/>
</label>

<label className="version-selector__item">
Expand Down Expand Up @@ -341,3 +356,30 @@ function changeMatches (change, other) {
function isDisabled (element) {
return element.disabled || element.classList.contains('disabled');
}

function relevantDiffTypes (versionA, versionB) {
let typeA = mediaTypeForVersion(versionA);

if (typeA.equals(mediaTypeForVersion(versionB))) {
return diffTypesFor(typeA);
}

// If we have differing types of content consider it an 'unkown' type.
return diffTypesFor(unknownType);
}

function mediaTypeForVersion (version) {
const contentType = version.content_type
|| version.source_metadata.content_type;

if (contentType) {
return parseMediaType(contentType);
}

if (version.uri) {
const extension = version.uri.match(/^([^:]+:\/\/)?.*\/[^/]*(\.[^/]+)$/);
return mediaTypeForExtension[extension && extension[2]] || htmlType;
}

return htmlType;
}
62 changes: 56 additions & 6 deletions src/components/diff-view.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import HighlightedTextDiff from './highlighted-text-diff';
import InlineRenderedDiff from './inline-rendered-diff';
import SideBySideRenderedDiff from './side-by-side-rendered-diff';
import ChangesOnlyDiff from './changes-only-diff';
import RawVersion from './raw-version';
import SideBySideRawVersions from './side-by-side-raw-versions';

/**
* @typedef DiffViewProps
Expand Down Expand Up @@ -60,27 +62,63 @@ export default class DiffView extends React.Component {

return (
<div className="diff-view">
{this.renderNoChangeMessage()}
{this.renderNoChangeMessage() || this.renderUndiffableMessage()}
{this.renderDiff()}
</div>
);
}

renderNoChangeMessage () {
if (this.state.diffData.change_count === 0) {
return <div className="diff-view__alert alert alert-warning">
There were NO changes for this diff type.</div>;
const sameContent = this.props.a
&& this.props.b
&& this.props.a.version_hash === this.props.b.version_hash;

const className = 'diff-view__alert alert alert-warning';

if (sameContent) {
return <div className={className}>
These two versions are <strong>exactly the same</strong>.
</div>;
}
else {
return null;
else if (this.state.diffData.change_count === 0) {
return <div className={className}>
There were <strong>no changes for this diff type</strong>. (Other diff
types may show changes.)
</div>;
}

return null;
}

renderUndiffableMessage () {
if (this.state.diffData.raw) {
return (
<div className="diff-view__alert alert alert-info">
We can’t compare the selected versions of page; you are viewing the
content without deletions and insertions highlighted.
</div>
);
}
return null;
}

renderDiff () {
// TODO: if we have multiple ways to render content from a single service
// in the future (e.g. inline vs. side-by-side text), we need a better
// way to ensure we use the correct rendering and avoid race conditions
switch (this.props.diffType) {
case diffTypes.RAW_SIDE_BY_SIDE.value:
return (
<SideBySideRawVersions page={this.props.page} a={this.props.a} b={this.props.b} diffData={this.state.diffData} />
);
case diffTypes.RAW_FROM_CONTENT.value:
return (
<RawVersion page={this.props.page} version={this.props.a} content={this.state.diffData.rawA} />
);
case diffTypes.RAW_TO_CONTENT.value:
return (
<RawVersion page={this.props.page} version={this.props.b} content={this.state.diffData.rawB} />
);
case diffTypes.HIGHLIGHTED_RENDERED.value:
return (
<InlineRenderedDiff diffData={this.state.diffData} page={this.props.page} />
Expand Down Expand Up @@ -146,6 +184,18 @@ export default class DiffView extends React.Component {
// (page: Page) => page.uuid === pageId);
// Promise.resolve(fromList || this.context.api.getDiff(pageId, aId, bId, changeDiffTypes[diffType]))
this.setState({diffData: null});
if (!diffTypes[diffType].diffService) {
return Promise.all([
fetch(this.props.a.uri, {mode: 'cors'}),
fetch(this.props.b.uri, {mode: 'cors'})
])
.then(([rawA, rawB]) => {
return {raw: true, rawA, rawB};
})
.catch(error => error)
.then(data => this.setState({diffData: data}));
}

this.context.api.getDiff(pageId, aId, bId, diffTypes[diffType].diffService, diffTypes[diffType].options)
.catch(error => {
return error;
Expand Down
34 changes: 34 additions & 0 deletions src/components/raw-version.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import React from 'react';
import SandboxedHtml from './sandboxed-html';

/**
* @typedef {Object} RawVersionProps
* @property {Page} page The page this diff pertains to
* @property {Version} version
* @property {string} content
*/

/**
* Display the raw content of a version.
*
* @class RawVersion
* @extends {React.Component}
* @param {RawVersionProps} props
*/
export default class RawVersion extends React.Component {
render () {
if (this.props.content && /^[\s\n\r]*</.test(this.props.content)) {
return (
<div className="inline-render">
<SandboxedHtml html={this.props.content} baseUrl={this.props.page.url} />
</div>
);
}

return (
<div className="inline-render">
<iframe src={this.props.version.uri} />
</div>
);
}
}
11 changes: 6 additions & 5 deletions src/components/select-diff-type.jsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import React from 'react';
import {diffTypes} from '../constants/diff-types';

/**
* A dropdown select box for types of diffs
Expand All @@ -10,6 +9,7 @@ import {diffTypes} from '../constants/diff-types';
* @param {string} value Identifier for the selected diff type
* @param {Function} onChange Callback when a new value is selected. Signature:
* `string => void`
* @param {DiffType[]} types
*/
export default class SelectDiffType extends React.Component {
render () {
Expand All @@ -20,11 +20,12 @@ export default class SelectDiffType extends React.Component {
return (
<select value={this.props.value} onChange={handleChange}>
<option value="">none</option>
{Object.keys(diffTypes).map(key => {
const diffType = diffTypes[key];
return <option key={diffType.value} value={diffType.value}>{diffType.description}</option>;
})}
{this.props.types.map(this._renderOption)}
</select>
);
}

_renderOption (diffType) {
return <option key={diffType.value} value={diffType.value}>{diffType.description}</option>;
}
}
36 changes: 36 additions & 0 deletions src/components/side-by-side-raw-versions.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import React from 'react';
import SandboxedHtml from './sandboxed-html';

/**
* @typedef {Object} SideBySideRawVersionsProps
* @property {DiffData} diffData Object containing diff to render and its metadata
* @property {Page} page The page this diff pertains to
* @property {Version} a
* @property {Version} b
*/

/**
* Display two versions of a page, side-by-side.
*
* @class SideBySideRawVersions
* @extends {React.Component}
* @param {SideBySideRawVersionsProps} props
*/
export default class SideBySideRawVersions extends React.Component {
render () {
return (
<div className="side-by-side-render">
{renderVersion(this.props.page, this.props.a, this.props.diffData.rawA)}
{renderVersion(this.props.page, this.props.b, this.props.diffData.rawB)}
</div>
);
}
}

function renderVersion (page, version, content) {
if (content && /^[\s\n\r]*</.test(content)) {
return <SandboxedHtml html={contentcontent} baseUrl={page.url} />;
}

return <iframe src={version.uri} />;
}
59 changes: 59 additions & 0 deletions src/constants/diff-types.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
import {
mediaTypeForExtension,
parseMediaType,
unknownType
} from '../scripts/media-type';

export const diffTypes = {
RAW_FROM_CONTENT: {
description: '“From” Version',
},
RAW_TO_CONTENT: {
description: '“To” Version',
},
RAW_SIDE_BY_SIDE: {
description: 'Side-by-side Content',
},
HIGHLIGHTED_TEXT: {
description: 'Highlighted Text',
diffService: 'html_text_dmp',
Expand Down Expand Up @@ -33,3 +48,47 @@ export const diffTypes = {
for (let key in diffTypes) {
diffTypes[key].value = key;
}

const diffTypesByMediaType = {
'text/html': [
diffTypes.HIGHLIGHTED_TEXT,
diffTypes.HIGHLIGHTED_SOURCE,
diffTypes.HIGHLIGHTED_RENDERED,
diffTypes.SIDE_BY_SIDE_RENDERED,
diffTypes.OUTGOING_LINKS,
diffTypes.CHANGES_ONLY_TEXT,
diffTypes.CHANGES_ONLY_SOURCE,
],

'text/*': [
diffTypes.HIGHLIGHTED_SOURCE,
diffTypes.CHANGES_ONLY_SOURCE,
],

'*/*': [
diffTypes.RAW_SIDE_BY_SIDE,
diffTypes.RAW_FROM_CONTENT,
diffTypes.RAW_TO_CONTENT,
],
};

/**
* Get appropriate diff types for a given kind of content.
* @param {string|MediaType} mediaType The type of content to get. Can be a
* MediaType object, a content type/media type string, or a file extension.
* @returns {Array<DiffType>}
*/
export function diffTypesFor (mediaType) {
let type = null;
if (typeof mediaType === 'string' && mediaType.startsWith('.')) {
type = mediaTypeForExtension(mediaType) || unknownType;
}
else {
type = parseMediaType(mediaType);
}

return diffTypesByMediaType[type.mediaType]
|| diffTypesByMediaType[type.genericType]
|| diffTypesByMediaType[unknownType.mediaType]
|| [];
}

0 comments on commit 4efa529

Please sign in to comment.