-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
349 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
create table archives ( | ||
id uuid primary key default gen_random_uuid(), | ||
owner_id uuid not null, | ||
bookmark_id uuid not null, | ||
created_at timestamptz not null default now(), | ||
updated_at timestamptz, | ||
url text not null, | ||
title text, | ||
site_name text, | ||
html_content text, | ||
length bigint, | ||
excerpt text, | ||
byline text, | ||
direction text, | ||
language text, | ||
ready boolean not null default false, | ||
error text, | ||
|
||
constraint fk_owner | ||
foreign key(owner_id) | ||
references users(id) | ||
on delete cascade, | ||
|
||
constraint fk_bookmark | ||
foreign key(bookmark_id) | ||
references bookmarks(id) | ||
on delete cascade | ||
); | ||
|
||
create index idx_archives_owner ON archives(owner_id); | ||
create index idx_archives_bookmark ON archives(bookmark_id); | ||
|
||
create trigger set_timestamp_archives | ||
before update on archives | ||
for each row | ||
execute procedure trigger_set_timestamp(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
drop trigger if exists set_timestamp_archives ON readabilty_archives; | ||
drop index if exists idx_archives_bookmark; | ||
drop index if exists idx_archives_owner; | ||
drop table if exists archives; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import fp from 'fastify-plugin' | ||
import { JSDOM } from 'jsdom' | ||
import { Readability } from '@mozilla/readability' | ||
import createDOMPurify from 'dompurify' | ||
|
||
/** | ||
* This plugin adds readability-extract fetching helpers | ||
*/ | ||
export default fp(async function (fastify, opts) { | ||
fastify.decorate('extractArchive', async function extractArchive ({ | ||
url, | ||
initialHTML // optinally pass html here if its already fetched before | ||
}) { | ||
const endTimer = fastify.metrics.archiveSeconds.startTimer() | ||
try { | ||
const cacheKey = { url } | ||
|
||
const cachedRBArchive = fastify.archiveCache.get(cacheKey) | ||
|
||
if (cachedRBArchive) { | ||
return cachedRBArchive | ||
} | ||
|
||
const html = initialHTML ?? await fastify.fetchHTML({ url }) | ||
|
||
const { document } = (new JSDOM(html, { url })).window | ||
const reader = new Readability(document) | ||
const article = reader.parse() | ||
|
||
const dpWindow = new JSDOM('').window | ||
const DOMPurify = createDOMPurify(dpWindow) | ||
article.content = DOMPurify.sanitize(article.content) | ||
|
||
fastify.siteMetaCache.set(cacheKey, article) | ||
|
||
return article | ||
} finally { | ||
endTimer() | ||
} | ||
}) | ||
}, { | ||
name: 'extract-archive', | ||
dependencies: ['env', 'prom', 'cache', 'prom', 'fetch-html'] | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import fp from 'fastify-plugin' | ||
import { request as undiciRequest } from 'undici' | ||
|
||
// Sorry newspapers, no cheating | ||
const GOOGLE_BOT_UA = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36' | ||
|
||
const uaHacks = { | ||
'twitter.com': GOOGLE_BOT_UA, | ||
'mobile.twitter.com': GOOGLE_BOT_UA | ||
} | ||
|
||
/** | ||
* This plugin adds a function to fetch html | ||
*/ | ||
export default fp(async function (fastify, opts) { | ||
fastify.decorate('fetchHTML', async function fetchHTML ({ | ||
url | ||
}) { | ||
const requestURL = new URL(url) | ||
|
||
const ua = uaHacks[requestURL.hostname] ?? `Breadcrum / ${fastify.pkg.version}` | ||
|
||
const response = await undiciRequest(requestURL, { | ||
headers: { | ||
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | ||
'user-agent': ua | ||
}, | ||
maxRedirections: 3, | ||
autoSelectFamily: true, | ||
headersTimeout: 15000, | ||
bodyTimeout: 15000 | ||
}) | ||
|
||
if (response.statusCode > 299) { | ||
const text = await response.body.text() | ||
throw new Error(`Fetch HTML error (${response.statusCode}): ` + text) | ||
} | ||
|
||
const html = await response.body.text() | ||
|
||
return html | ||
}) | ||
}, { | ||
name: 'fetch-html', | ||
dependencies: ['env'] | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
export const createArchiveProp = { | ||
createArchive: { | ||
anyOf: [ | ||
{ | ||
type: 'object', | ||
properties: { | ||
url: { type: 'string', format: 'uri' } | ||
}, | ||
required: [ | ||
'uri' | ||
] | ||
}, | ||
{ | ||
type: 'boolean' | ||
}, | ||
{ | ||
type: 'null' | ||
} | ||
] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* eslint-disable camelcase */ | ||
import SQL from '@nearform/sql' | ||
|
||
export async function createArchive ({ | ||
client, | ||
userID, | ||
bookmarkId, | ||
bookmarkTitle, | ||
url | ||
}) { | ||
const createArchiveQuery = SQL` | ||
INSERT INTO archives (owner_id, bookmark_id, url, title) | ||
VALUES (${userID}, ${bookmarkId}, ${url}, ${bookmarkTitle}) | ||
returning id, url, title; | ||
` | ||
|
||
const archiveResults = await client.query(createArchiveQuery) | ||
const archive = archiveResults.rows[0] | ||
return archive | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
export default async function archiveRoutes (fastify, opts) { | ||
await Promise.all([]) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import SQL from '@nearform/sql' | ||
|
||
export async function resolveArchive ({ | ||
fastify, | ||
pg, // optional tx client | ||
log, // optional request logging instance | ||
userID, | ||
bookmarkTitle, | ||
archiveID, | ||
url, | ||
initialHTML | ||
}) { | ||
pg = pg ?? fastify.pg | ||
log = log ?? fastify.log | ||
|
||
try { | ||
const article = await fastify.extractArchive({ | ||
url, | ||
initialHTML | ||
}) | ||
|
||
// log.info({ article }) | ||
|
||
const archiveData = [] | ||
|
||
archiveData.push(SQL`ready = true`) | ||
archiveData.push(SQL`url = ${url}`) | ||
if ('title' in article) archiveData.push(SQL`title = ${article.title}`) | ||
if ('siteName' in article) archiveData.push(SQL`site_name = ${article.siteName}`) | ||
if ('content' in article) archiveData.push(SQL`html_content = ${article.content}`) | ||
if ('length' in article) archiveData.push(SQL`length = ${article.length}`) | ||
if ('excerpt' in article) archiveData.push(SQL`excerpt = ${article.excerpt}`) | ||
if ('byline' in article) archiveData.push(SQL`byline = ${article.byline}`) | ||
if ('dir' in article) archiveData.push(SQL`direction = ${article.dir}`) | ||
if ('lang' in article) archiveData.push(SQL`language = ${article.lang}`) | ||
|
||
const query = SQL` | ||
update archives | ||
set ${SQL.glue(archiveData, ' , ')} | ||
where id = ${archiveID} | ||
and owner_id =${userID}; | ||
` | ||
|
||
const archiveResult = await pg.query(query) | ||
archiveResult.rows.pop() | ||
|
||
log.info(`Archive ${archiveID} for ${url} is ready.`) | ||
} catch (err) { | ||
log.error(`Error resolving archive ${archiveID}`) | ||
log.error(err) | ||
const errorQuery = SQL` | ||
update archives | ||
set error = ${err.stack} | ||
where id = ${archiveID} | ||
and owner_id =${userID};` | ||
await pg.query(errorQuery) | ||
} | ||
} |
Oops, something went wrong.