From eafd288c6c852752478da14d57ec05d298b58632 Mon Sep 17 00:00:00 2001 From: Andrey Sitnik Date: Sat, 13 Apr 2024 23:01:33 +0200 Subject: [PATCH] Refactor URL suggestion --- core/loader/atom.ts | 22 +++++++++--------- core/loader/index.ts | 7 ++---- core/loader/rss.ts | 21 ++++++++--------- core/loader/utils.ts | 42 ++++++++++------------------------ core/preview.ts | 18 +++++++++++---- core/test/loader/atom.test.ts | 43 +++++------------------------------ core/test/loader/rss.test.ts | 40 ++++---------------------------- core/test/preview.test.ts | 18 ++++++++------- 8 files changed, 70 insertions(+), 141 deletions(-) diff --git a/core/loader/atom.ts b/core/loader/atom.ts index 57e2e8c6..b581845b 100644 --- a/core/loader/atom.ts +++ b/core/loader/atom.ts @@ -2,7 +2,7 @@ import type { TextResponse } from '../download.js' import type { OriginPost } from '../post.js' import { createPostsPage } from '../posts-page.js' import type { Loader } from './index.js' -import { findLinks, hasAnyFeed, toTime } from './utils.js' +import { findAnchorHrefs, findLinksByType, toTime } from './utils.js' function parsePosts(text: TextResponse): OriginPost[] { let document = text.parse() @@ -26,16 +26,11 @@ function parsePosts(text: TextResponse): OriginPost[] { } export const atom: Loader = { - getMineLinksFromText(text, found) { - let links = findLinks(text, 'application/atom+xml', /feed\.|\.atom|\/atom/i) - if (links.length > 0) { - return links - } else if (!hasAnyFeed(text, found)) { - let { origin } = new URL(text.url) - return [new URL('/feed', origin).href, new URL('/atom', origin).href] - } else { - return [] - } + getMineLinksFromText(text) { + return [ + ...findLinksByType(text, 'application/atom+xml'), + ...findAnchorHrefs(text, /feed\.|\.atom|\/atom/i) + ] }, getPosts(task, url, text) { @@ -48,6 +43,11 @@ export const atom: Loader = { } }, + getSuggestedLinksFromText(text) { + let { origin } = new URL(text.url) + return [new URL('/feed', origin).href, new URL('/atom', origin).href] + }, + isMineText(text) { let document = text.parse() if (document.firstElementChild?.nodeName === 'feed') { diff --git a/core/loader/index.ts b/core/loader/index.ts index 28f1e40b..bf334a49 100644 --- a/core/loader/index.ts +++ b/core/loader/index.ts @@ -1,15 +1,12 @@ import type { DownloadTask, TextResponse } from '../download.js' import type { PostsPage } from '../posts-page.js' -import type { PreviewCandidate } from '../preview.js' import { atom } from './atom.js' import { rss } from './rss.js' export type Loader = { - getMineLinksFromText( - response: TextResponse, - found: PreviewCandidate[] - ): string[] + getMineLinksFromText(response: TextResponse): string[] getPosts(task: DownloadTask, url: string, text?: TextResponse): PostsPage + getSuggestedLinksFromText(response: TextResponse): string[] isMineText(response: TextResponse): false | string isMineUrl(url: URL): false | string | undefined } diff --git a/core/loader/rss.ts b/core/loader/rss.ts index b7178780..7e4c12f7 100644 --- a/core/loader/rss.ts +++ b/core/loader/rss.ts @@ -2,7 +2,7 @@ import type { TextResponse } from '../download.js' import type { OriginPost } from '../post.js' import { createPostsPage } from '../posts-page.js' import type { Loader } from './index.js' -import { findLinks, hasAnyFeed, toTime } from './utils.js' +import { findAnchorHrefs, findLinksByType, toTime } from './utils.js' function parsePosts(text: TextResponse): OriginPost[] { let document = text.parse() @@ -25,16 +25,11 @@ function parsePosts(text: TextResponse): OriginPost[] { } export const rss: Loader = { - getMineLinksFromText(text, found) { - let links = findLinks(text, 'application/rss+xml', /\.rss|\/rss/i) - if (links.length > 0) { - return links - } else if (!hasAnyFeed(text, found)) { - let { origin } = new URL(text.url) - return [new URL('/rss', origin).href] - } else { - return [] - } + getMineLinksFromText(text) { + return [ + ...findLinksByType(text, 'application/rss+xml'), + ...findAnchorHrefs(text, /\.rss|\/rss/i) + ] }, getPosts(task, url, text) { @@ -47,6 +42,10 @@ export const rss: Loader = { } }, + getSuggestedLinksFromText(text) { + return [new URL('/rss', new URL(text.url).origin).href] + }, + isMineText(text) { let document = text.parse() if (document.firstElementChild?.nodeName === 'rss') { diff --git a/core/loader/utils.ts b/core/loader/utils.ts index f97012c8..51e903a6 100644 --- a/core/loader/utils.ts +++ b/core/loader/utils.ts @@ -1,5 +1,4 @@ import type { TextResponse } from '../download.js' -import type { PreviewCandidate } from '../preview.js' export function isString(attr: null | string): attr is string { return typeof attr === 'string' && attr.length > 0 @@ -27,44 +26,27 @@ function buildFullURL( ) } -export function findLinks( - text: TextResponse, - type: string, - hrefPattern: RegExp -): string[] { - let links = [...text.parse().querySelectorAll('link')] +export function findLinksByType(text: TextResponse, type: string): string[] { + return [...text.parse().querySelectorAll('link')] .filter( link => link.getAttribute('type') === type && isString(link.getAttribute('href')) ) .map(link => buildFullURL(link, text.url)) - - links.push( - ...[...text.parse().querySelectorAll('a')] - .filter(a => { - let href = a.getAttribute('href') - if (!href) return false - return hrefPattern.test(href) - }) - .map(a => buildFullURL(a, text.url)) - ) - - return links } -export function hasAnyFeed( +export function findAnchorHrefs( text: TextResponse, - found: PreviewCandidate[] -): boolean { - return ( - findLinks(text, 'application/atom+xml', /feed\.|\.atom|\/atom/i).length > - 0 || - findLinks(text, 'application/rss+xml', /\.rss|\/rss/i).length > 0 || - // TODO: Replace when we will have more loaders - // found.some(i => i.loader === 'rss' || i.loader === 'atom') - found.length > 0 - ) + hrefPattern: RegExp +): string[] { + return [...text.parse().querySelectorAll('a')] + .filter(a => { + let href = a.getAttribute('href') + if (!href) return false + return hrefPattern.test(href) + }) + .map(a => buildFullURL(a, text.url)) } export function toTime(date: null | string | undefined): number | undefined { diff --git a/core/preview.ts b/core/preview.ts index e37fb2d3..33970e79 100644 --- a/core/preview.ts +++ b/core/preview.ts @@ -157,9 +157,14 @@ export function getLoaderForText( function getLinksFromText(response: TextResponse): string[] { let names = Object.keys(loaders) as LoaderName[] return names.reduce((links, name) => { - return links.concat( - loaders[name].getMineLinksFromText(response, $candidates.get()) - ) + return links.concat(loaders[name].getMineLinksFromText(response)) + }, []) +} + +function getSuggestedLinksFromText(response: TextResponse): string[] { + let names = Object.keys(loaders) as LoaderName[] + return names.reduce((links, name) => { + return links.concat(loaders[name].getSuggestedLinksFromText(response)) }, []) } @@ -211,7 +216,12 @@ export async function addLink(url: string, deep = false): Promise { } if (!deep) { let links = getLinksFromText(response) - await Promise.all(links.map(i => addLink(i, true))) + if (links.length > 0) { + await Promise.all(links.map(i => addLink(i, true))) + } else if ($candidates.get().length === 0) { + let suggested = getSuggestedLinksFromText(response) + await Promise.all(suggested.map(i => addLink(i, true))) + } } if (byText === false) { $links.setKey(url, { state: 'unknown' }) diff --git a/core/test/loader/atom.test.ts b/core/test/loader/atom.test.ts index 07ed9e7d..ed2715c1 100644 --- a/core/test/loader/atom.test.ts +++ b/core/test/loader/atom.test.ts @@ -69,8 +69,7 @@ test('detects xml:base attribute', () => { { url: 'http://example.com' } - ), - [] + ) ), [ 'http://example.com/today/1.xml', @@ -105,8 +104,7 @@ test('detects links', () => { { url: 'https://example.com/news/' } - ), - [] + ) ), [ 'https://example.com/a', @@ -133,8 +131,7 @@ test('finds atom links in elements', () => { { url: 'https://example.com/news' } - ), - [] + ) ), [ 'https://example.com/atom', @@ -147,40 +144,12 @@ test('finds atom links in elements', () => { test('returns default links', () => { deepStrictEqual( - loaders.atom.getMineLinksFromText( + loaders.atom.getSuggestedLinksFromText( createTextResponse('', { url: 'https://example.com/news/' - }), - [] - ), - ['https://example.com/atom'] - ) -}) - -test('ignores default URL on RSS link', () => { - deepStrictEqual( - loaders.atom.getMineLinksFromText( - createTextResponse( - ` - - - - - ` - ), - [] - ), - [] - ) - deepStrictEqual( - loaders.atom.getMineLinksFromText( - createTextResponse( - ` - ` - ), - [{ loader: 'rss', title: 'RSS', url: 'https://example.com/rss' }] + }) ), - [] + ['https://example.com/feed', 'https://example.com/atom'] ) }) diff --git a/core/test/loader/rss.test.ts b/core/test/loader/rss.test.ts index 62b05249..36d91201 100644 --- a/core/test/loader/rss.test.ts +++ b/core/test/loader/rss.test.ts @@ -39,8 +39,7 @@ test('detects links', () => { { url: 'https://example.com/news/' } - ), - [] + ) ), [ 'https://example.com/a', @@ -65,8 +64,7 @@ test('finds rss links in elements', () => { { url: 'https://example.com/news' } - ), - [] + ) ), ['https://example.com/rss', 'https://example.com/something.rss'] ) @@ -74,40 +72,12 @@ test('finds rss links in elements', () => { test('returns default links', () => { deepStrictEqual( - loaders.rss.getMineLinksFromText( + loaders.rss.getSuggestedLinksFromText( createTextResponse('', { url: 'https://example.com/news/' - }), - [] - ), - ['https://example.com/feed', 'https://example.com/rss'] - ) -}) - -test('ignores default URL on Atom link', () => { - deepStrictEqual( - loaders.rss.getMineLinksFromText( - createTextResponse( - ` - - - - - ` - ), - [] - ), - [] - ) - deepStrictEqual( - loaders.atom.getMineLinksFromText( - createTextResponse( - ` - ` - ), - [{ loader: 'atom', title: 'Atom', url: 'https://example.com/atom' }] + }) ), - [] + ['https://example.com/rss'] ) }) diff --git a/core/test/preview.test.ts b/core/test/preview.test.ts index 9edb3eb4..c8f5e63a 100644 --- a/core/test/preview.test.ts +++ b/core/test/preview.test.ts @@ -87,6 +87,8 @@ test('uses HTTPS for specific domains', async () => { keepMount(previewCandidates) spyOn(loaders.rss, 'getMineLinksFromText', () => []) spyOn(loaders.atom, 'getMineLinksFromText', () => []) + spyOn(loaders.rss, 'getSuggestedLinksFromText', () => []) + spyOn(loaders.atom, 'getSuggestedLinksFromText', () => []) expectRequest('https://twitter.com/blog').andRespond(200, '') setPreviewUrl('twitter.com/blog') @@ -232,8 +234,8 @@ test('looks for popular RSS and Atom places', async () => { expectRequest('http://example.com').andRespond(200, 'Nothing') let atom = '' - expectRequest('http://example.com/atom').andRespond(200, atom, 'text/xml') expectRequest('http://example.com/feed').andRespond(404) + expectRequest('http://example.com/atom').andRespond(200, atom, 'text/xml') expectRequest('http://example.com/rss').andRespond(404) setPreviewUrl('example.com') @@ -256,8 +258,8 @@ test('shows if unknown URL', async () => { keepMount(previewCandidates) expectRequest('http://example.com').andRespond(200, 'Nothing') - expectRequest('http://example.com/atom').andRespond(404) expectRequest('http://example.com/feed').andRespond(404) + expectRequest('http://example.com/atom').andRespond(404) expectRequest('http://example.com/rss').andRespond(404) setPreviewUrl('example.com') @@ -272,12 +274,12 @@ test('shows if unknown URL', async () => { test('always keep the same order of candidates', async () => { keepMount(previewCandidates) expectRequest('http://example.com').andRespond(200, 'Nothing') + expectRequest('http://example.com/feed').andRespond(404) expectRequest('http://example.com/atom').andRespond( 200, 'Atom', 'application/rss+xml' ) - expectRequest('http://example.com/feed').andRespond(404) expectRequest('http://example.com/rss').andRespond( 200, 'RSS', @@ -293,8 +295,8 @@ test('always keep the same order of candidates', async () => { clearPreview() expectRequest('http://example.com').andRespond(200, 'Nothing') - let atom = expectRequest('http://example.com/atom').andWait() expectRequest('http://example.com/feed').andRespond(404) + let atom = expectRequest('http://example.com/atom').andWait() expectRequest('http://example.com/rss').andRespond( 200, 'RSS', @@ -320,12 +322,12 @@ test('tracks current candidate', async () => { let getRssPosts = spyOn(loaders.rss, 'getPosts') expectRequest('http://example.com').andRespond(200, 'Nothing') + expectRequest('http://example.com/feed').andRespond(404) expectRequest('http://example.com/atom').andRespond( 200, 'Atom', 'application/rss+xml' ) - expectRequest('http://example.com/feed').andRespond(404) expectRequest('http://example.com/rss').andRespond( 200, 'RSS', @@ -488,8 +490,8 @@ test('changes URL during typing in the field', async () => { equal(previewUrl.get(), '') expectRequest('http://example.com').andRespond(200, 'Nothing') - expectRequest('http://example.com/atom').andRespond(404) expectRequest('http://example.com/feed').andRespond(404) + expectRequest('http://example.com/atom').andRespond(404) expectRequest('http://example.com/rss').andRespond(404) setPreviewUrl('example.com') equal(previewUrl.get(), 'http://example.com') @@ -502,16 +504,16 @@ test('changes URL during typing in the field', async () => { equal(previewUrl.get(), 'http://example.com') expectRequest('http://other.net').andRespond(200, 'Nothing') - expectRequest('http://other.net/atom').andRespond(404) expectRequest('http://other.net/feed').andRespond(404) + expectRequest('http://other.net/atom').andRespond(404) expectRequest('http://other.net/rss').andRespond(404) onPreviewUrlType('other.net') await setTimeout(500) equal(previewUrl.get(), 'http://other.net') expectRequest('http://example.com').andRespond(200, 'Nothing') - expectRequest('http://example.com/atom').andRespond(404) expectRequest('http://example.com/feed').andRespond(404) + expectRequest('http://example.com/atom').andRespond(404) expectRequest('http://example.com/rss').andRespond(404) onPreviewUrlType('other.net/some') setPreviewUrl('example.com')