Skip to content

Commit

Permalink
Add full text search
Browse files Browse the repository at this point in the history
Add full text search for bookmarks, episodes and archives.
  • Loading branch information
bcomnes committed Sep 17, 2023
1 parent 8223c30 commit 02dcc26
Show file tree
Hide file tree
Showing 61 changed files with 1,913 additions and 188 deletions.
2 changes: 1 addition & 1 deletion app.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export const options = {
trustProxy: true,
genReqId: function (req) { return hid() },
logger: {
mixin () {
mixin () { // TODO: move this to the log ingestor somehow?
return {
service: 'breadcrum.net',
ddsource: 'nodejs',
Expand Down
118 changes: 118 additions & 0 deletions migrations/018.do.fts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
-- Bookmarks FTS
alter table bookmarks
add column tsv tsvector;

create index bookmarks_tsv_idx on bookmarks using gin(tsv);


update bookmarks set tsv =
setweight(to_tsvector('english', coalesce(note,'')), 'A') ||
setweight(to_tsvector('english', coalesce(title,'')), 'B') ||
setweight(to_tsvector('english', coalesce(summary,'')), 'C') ||
setweight(to_tsvector('english', coalesce(url,'')), 'C');

create function bookmarks_tsv_update() returns trigger as $$
begin
new.tsv :=
setweight(to_tsvector('english', coalesce(NEW.note,'')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.title,'')), 'B') ||
setweight(to_tsvector('english', coalesce(NEW.summary,'')), 'C') ||
setweight(to_tsvector('english', coalesce(NEW.url,'')), 'C');
return new;
end
$$ language plpgsql;

create trigger bookmarks_tsv_trigger before insert or update
on bookmarks for each row execute function bookmarks_tsv_update();

-- Archives FTS
alter table archives
add column tsv tsvector;

create index archives_tsv_idx on archives using gin(tsv);


update archives set tsv =
setweight(to_tsvector('english', coalesce(title,'')), 'A') ||
setweight(to_tsvector('english', coalesce(text_content,'')), 'B') ||
setweight(to_tsvector('english', coalesce(excerpt,'')), 'C') ||
setweight(to_tsvector('english', coalesce(site_name,'')), 'D') ||
setweight(to_tsvector('english', coalesce(byline,'')), 'D') ||
setweight(to_tsvector('english', coalesce(url,'')), 'D');


create function archives_tsv_update() returns trigger as $$
begin
new.tsv :=
setweight(to_tsvector('english', coalesce(NEW.title,'')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.text_content,'')), 'B') ||
setweight(to_tsvector('english', coalesce(NEW.excerpt,'')), 'C') ||
setweight(to_tsvector('english', coalesce(NEW.site_name,'')), 'D') ||
setweight(to_tsvector('english', coalesce(NEW.byline,'')), 'D') ||
setweight(to_tsvector('english', coalesce(NEW.url,'')), 'D');
return new;
end
$$ language plpgsql;

create trigger archives_tsv_trigger before insert or update
on archives for each row execute function archives_tsv_update();

-- Episodes FTS
alter table episodes
add column tsv tsvector;

create index episodes_tsv_idx on episodes using gin(tsv);


update episodes set tsv =
setweight(to_tsvector('english', coalesce(title,'')), 'A') ||
setweight(to_tsvector('english', coalesce(text_content,'')), 'B') ||
setweight(to_tsvector('english', coalesce(author_name,'')), 'C') ||
setweight(to_tsvector('english', coalesce(filename,'')), 'D') ||
setweight(to_tsvector('english', coalesce(url,'')), 'D');


create function episodes_tsv_update() returns trigger as $$
begin
new.tsv :=
setweight(to_tsvector('english', coalesce(NEW.title,'')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.text_content,'')), 'B') ||
setweight(to_tsvector('english', coalesce(NEW.author_name,'')), 'C') ||
setweight(to_tsvector('english', coalesce(NEW.filename,'')), 'D') ||
setweight(to_tsvector('english', coalesce(NEW.url,'')), 'D');
return new;
end
$$ language plpgsql;

create trigger episodes_tsv_trigger before insert or update
on episodes for each row execute function episodes_tsv_update();

-- Users FTS
alter table users
add column tsv tsvector;

create index users_tsv_idx on users using gin(tsv);


update users set tsv =
setweight(to_tsvector('english', coalesce(username,'')), 'A') ||
setweight(to_tsvector('english', coalesce(disabled_reason,'')), 'A') ||
setweight(to_tsvector('english', coalesce(internal_note,'')), 'A') ||
setweight(to_tsvector('english', coalesce(email,'')), 'B') ||
setweight(to_tsvector('english', coalesce(pending_email_update,'')), 'B');


create function users_tsv_update() returns trigger as $$
begin
new.tsv :=
setweight(to_tsvector('english', coalesce(NEW.username,'')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.disabled_reason,'')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.internal_note,'')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.email,'')), 'B') ||
setweight(to_tsvector('english', coalesce(NEW.pending_email_update,'')), 'B');
return new;
end
$$ language plpgsql;

create trigger users_tsv_trigger before insert or update
on users for each row execute function users_tsv_update();
23 changes: 23 additions & 0 deletions migrations/018.undo.fts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
-- Bookmarks FTS down
drop trigger if exists bookmarks_tsv_trigger on bookmarks;
drop function if exists bookmarks_tsv_update();
drop index if exists bookmarks_tsv_idx;
alter table bookmarks drop column if exists tsv;

-- Archives FTS down
drop trigger if exists archives_tsv_trigger on archives;
drop function if exists archives_tsv_update();
drop index if exists archives_tsv_idx;
alter table archives drop column if exists tsv;

-- Episodes FTS down
drop trigger if exists episodes_tsv_trigger on episodes;
drop function if exists episodes_tsv_update();
drop index if exists episodes_tsv_idx;
alter table episodes drop column if exists tsv;

-- Users FTS down
drop trigger if exists users_tsv_trigger on users;
drop function if exists users_tsv_update();
drop index if exists users_tsv_idx;
alter table users drop column if exists tsv;
6 changes: 5 additions & 1 deletion plugins/helmet.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@ import fp from 'fastify-plugin'
export default fp(async function (fastify, opts) {
// This is also customized in the ./static.js plugin
fastify.register(import('@fastify/helmet'), {
crossOriginResourcePolicy: { policy: 'cross-origin' },
crossOriginEmbedderPolicy: { policy: 'credentialless' },
contentSecurityPolicy: {
directives: {
'upgrade-insecure-requests': fastify.config.ENV !== 'production' ? null : []
'upgrade-insecure-requests': fastify.config.ENV !== 'production' ? null : [],
'media-src': '*',
'img-src': '*'
}
}
})
Expand Down
50 changes: 0 additions & 50 deletions plugins/static.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,56 +36,6 @@ export default fp(async function (fastify, opts) {
...staticOpts
})
}, { prefix: '/admin' })

// Feed Routes modified CSP
const upgradeInsecureRequests = fastify.config.ENV !== 'production' ? null : []

const episodesHeaders = {
contentSecurityPolicy: {
directives: {
'media-src': '*',
'img-src': '*',
'upgrade-insecure-requests': upgradeInsecureRequests
}
}
}
fastify.register(async function (fastify, opts) {
fastify.register(import('@fastify/helmet'), episodesHeaders)
fastify.register(import('@fastify/static'), {
root: path.join(__dirname, '../public/feeds'),
prefix: '/',
...staticOpts
})
}, { prefix: '/feeds' })
// Episodes routes
fastify.register(async function (fastify, opts) {
fastify.register(import('@fastify/helmet'), episodesHeaders)
fastify.register(import('@fastify/static'), {
root: path.join(__dirname, '../public/episodes'),
prefix: '/',
...staticOpts
})
}, { prefix: '/episodes' })

// Archives Routes Modified CSP, COEP, CORP
fastify.register(async function (fastify, opts) {
fastify.register(import('@fastify/helmet'), {
crossOriginResourcePolicy: { policy: 'cross-origin' },
crossOriginEmbedderPolicy: { policy: 'credentialless' },
contentSecurityPolicy: {
directives: {
'media-src': '*',
'img-src': '*',
'upgrade-insecure-requests': upgradeInsecureRequests
}
}
})
fastify.register(import('@fastify/static'), {
root: path.join(__dirname, '../public/archives/view'),
prefix: '/',
...staticOpts
})
}, { prefix: '/archives/view' })
}, {
name: 'static',
dependencies: ['compress', 'auth', 'jwt', 'helmet']
Expand Down
5 changes: 5 additions & 0 deletions routes/api/admin/search/users/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export default async function searchAdminUsersRoutes (fastify, opts) {
await Promise.all([

])
}
73 changes: 63 additions & 10 deletions routes/api/archives/archive-query-get.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,48 @@
import SQL from '@nearform/sql'

export function getArchivesQuery ({
/**
* @typedef {import('@nearform/sql').SqlStatement} SqlStatement
*/

/**
* Generates an SQL query for fetching archive properties based on various filters.
*
* @export
* @param {Object} options - The options object containing filter and query properties.
* @param {boolean} [options.fullArchives] - Whether to include full HTML content of the archive.
* @param {string|number} options.ownerId - The owner ID to filter the archives and bookmarks.
* @param {boolean} [options.sensitive] - Whether to include sensitive bookmarks.
* @param {boolean} [options.toread] - Whether to include bookmarks marked "to read."
* @param {boolean} [options.starred] - Whether to include starred bookmarks.
* @param {boolean} [options.ready] - Whether the archive is ready.
* @param {string|number} [options.archiveId] - Specific archive ID to fetch.
* @param {string|number} [options.bookmarkId] - Specific bookmark ID associated with an archive to fetch.
* @param {string|number} [options.before] - Timestamp to fetch archives created before this time.
* @param {boolean} [options.withRank] - Whether to include ranking based on text search.
* @param {string} [options.query] - Text search query for ranking.
* @param {boolean} [options.includeRank] - Include rank column
*
* @returns {SqlStatement} The generated SQL query.
*/
export function archivePropsQuery ({
fullArchives,
ownerId,
archiveId,
bookmarkId,
before,
sensitive,
toread,
starred,
ready,
perPage,
fullArchives
archiveId,
bookmarkId,
before,
query,
includeRank
}) {
const archivesQuery = SQL`
return SQL`
select
ar.id,
ar.created_at,
ar.updated_at,
${includeRank ? SQL`ts_rank(ar.tsv, websearch_to_tsquery('english', ${query})) AS rank,` : SQL``}
ar.url,
ar.title,
coalesce (ar.title, bm.title) as display_title,
Expand Down Expand Up @@ -47,13 +73,40 @@ export function getArchivesQuery ({
on ar.bookmark_id = bm.id
where ar.owner_id = ${ownerId}
and bm.owner_id = ${ownerId}
${archiveId ? SQL`and ar.id = ${archiveId}` : SQL``}
${bookmarkId ? SQL`and ar.bookmark_id = ${bookmarkId}` : SQL``}
${before ? SQL`and ar.created_at < ${before}` : SQL``}
${!sensitive ? SQL`and sensitive = false` : SQL``}
${toread ? SQL`and toread = true` : SQL``}
${starred ? SQL`and starred = true` : SQL``}
${ready != null ? SQL`and ready = ${ready}` : SQL``}
${archiveId ? SQL`and ar.id = ${archiveId}` : SQL``}
${bookmarkId ? SQL`and ar.bookmark_id = ${bookmarkId}` : SQL``}
${before ? SQL`and ar.created_at < ${before}` : SQL``}
`
}

export function getArchivesQuery ({
ownerId,
archiveId,
bookmarkId,
before,
sensitive,
toread,
starred,
ready,
perPage,
fullArchives
}) {
const archivesQuery = SQL`
${archivePropsQuery({
fullArchives,
ownerId,
sensitive,
toread,
starred,
ready,
archiveId,
bookmarkId,
before
})}
order by ar.created_at desc, ar.url desc, bm.title desc
${perPage != null ? SQL`fetch first ${perPage} rows only` : SQL``}
`
Expand Down

0 comments on commit 02dcc26

Please sign in to comment.