Skip to content

Commit

Permalink
Refactor URL suggestion
Browse files Browse the repository at this point in the history
  • Loading branch information
ai committed Apr 13, 2024
1 parent 5e33267 commit eafd288
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 141 deletions.
22 changes: 11 additions & 11 deletions core/loader/atom.ts
Expand Up @@ -2,7 +2,7 @@ import type { TextResponse } from '../download.js'
import type { OriginPost } from '../post.js'
import { createPostsPage } from '../posts-page.js'
import type { Loader } from './index.js'
import { findLinks, hasAnyFeed, toTime } from './utils.js'
import { findAnchorHrefs, findLinksByType, toTime } from './utils.js'

function parsePosts(text: TextResponse): OriginPost[] {
let document = text.parse()
Expand All @@ -26,16 +26,11 @@ function parsePosts(text: TextResponse): OriginPost[] {
}

export const atom: Loader = {
getMineLinksFromText(text, found) {
let links = findLinks(text, 'application/atom+xml', /feed\.|\.atom|\/atom/i)
if (links.length > 0) {
return links
} else if (!hasAnyFeed(text, found)) {
let { origin } = new URL(text.url)
return [new URL('/feed', origin).href, new URL('/atom', origin).href]
} else {
return []
}
getMineLinksFromText(text) {
return [
...findLinksByType(text, 'application/atom+xml'),
...findAnchorHrefs(text, /feed\.|\.atom|\/atom/i)
]
},

getPosts(task, url, text) {
Expand All @@ -48,6 +43,11 @@ export const atom: Loader = {
}
},

getSuggestedLinksFromText(text) {
let { origin } = new URL(text.url)
return [new URL('/feed', origin).href, new URL('/atom', origin).href]
},

isMineText(text) {
let document = text.parse()
if (document.firstElementChild?.nodeName === 'feed') {
Expand Down
7 changes: 2 additions & 5 deletions core/loader/index.ts
@@ -1,15 +1,12 @@
import type { DownloadTask, TextResponse } from '../download.js'
import type { PostsPage } from '../posts-page.js'
import type { PreviewCandidate } from '../preview.js'
import { atom } from './atom.js'
import { rss } from './rss.js'

export type Loader = {
getMineLinksFromText(
response: TextResponse,
found: PreviewCandidate[]
): string[]
getMineLinksFromText(response: TextResponse): string[]
getPosts(task: DownloadTask, url: string, text?: TextResponse): PostsPage
getSuggestedLinksFromText(response: TextResponse): string[]
isMineText(response: TextResponse): false | string
isMineUrl(url: URL): false | string | undefined
}
Expand Down
21 changes: 10 additions & 11 deletions core/loader/rss.ts
Expand Up @@ -2,7 +2,7 @@ import type { TextResponse } from '../download.js'
import type { OriginPost } from '../post.js'
import { createPostsPage } from '../posts-page.js'
import type { Loader } from './index.js'
import { findLinks, hasAnyFeed, toTime } from './utils.js'
import { findAnchorHrefs, findLinksByType, toTime } from './utils.js'

function parsePosts(text: TextResponse): OriginPost[] {
let document = text.parse()
Expand All @@ -25,16 +25,11 @@ function parsePosts(text: TextResponse): OriginPost[] {
}

export const rss: Loader = {
getMineLinksFromText(text, found) {
let links = findLinks(text, 'application/rss+xml', /\.rss|\/rss/i)
if (links.length > 0) {
return links
} else if (!hasAnyFeed(text, found)) {
let { origin } = new URL(text.url)
return [new URL('/rss', origin).href]
} else {
return []
}
getMineLinksFromText(text) {
return [
...findLinksByType(text, 'application/rss+xml'),
...findAnchorHrefs(text, /\.rss|\/rss/i)
]
},

getPosts(task, url, text) {
Expand All @@ -47,6 +42,10 @@ export const rss: Loader = {
}
},

getSuggestedLinksFromText(text) {
return [new URL('/rss', new URL(text.url).origin).href]
},

isMineText(text) {
let document = text.parse()
if (document.firstElementChild?.nodeName === 'rss') {
Expand Down
42 changes: 12 additions & 30 deletions core/loader/utils.ts
@@ -1,5 +1,4 @@
import type { TextResponse } from '../download.js'
import type { PreviewCandidate } from '../preview.js'

export function isString(attr: null | string): attr is string {
return typeof attr === 'string' && attr.length > 0
Expand Down Expand Up @@ -27,44 +26,27 @@ function buildFullURL(
)
}

export function findLinks(
text: TextResponse,
type: string,
hrefPattern: RegExp
): string[] {
let links = [...text.parse().querySelectorAll('link')]
export function findLinksByType(text: TextResponse, type: string): string[] {
return [...text.parse().querySelectorAll('link')]
.filter(
link =>
link.getAttribute('type') === type &&
isString(link.getAttribute('href'))
)
.map(link => buildFullURL(link, text.url))

links.push(
...[...text.parse().querySelectorAll('a')]
.filter(a => {
let href = a.getAttribute('href')
if (!href) return false
return hrefPattern.test(href)
})
.map(a => buildFullURL(a, text.url))
)

return links
}

export function hasAnyFeed(
export function findAnchorHrefs(
text: TextResponse,
found: PreviewCandidate[]
): boolean {
return (
findLinks(text, 'application/atom+xml', /feed\.|\.atom|\/atom/i).length >
0 ||
findLinks(text, 'application/rss+xml', /\.rss|\/rss/i).length > 0 ||
// TODO: Replace when we will have more loaders
// found.some(i => i.loader === 'rss' || i.loader === 'atom')
found.length > 0
)
hrefPattern: RegExp
): string[] {
return [...text.parse().querySelectorAll('a')]
.filter(a => {
let href = a.getAttribute('href')
if (!href) return false
return hrefPattern.test(href)
})
.map(a => buildFullURL(a, text.url))
}

export function toTime(date: null | string | undefined): number | undefined {
Expand Down
18 changes: 14 additions & 4 deletions core/preview.ts
Expand Up @@ -157,9 +157,14 @@ export function getLoaderForText(
function getLinksFromText(response: TextResponse): string[] {
let names = Object.keys(loaders) as LoaderName[]
return names.reduce<string[]>((links, name) => {
return links.concat(
loaders[name].getMineLinksFromText(response, $candidates.get())
)
return links.concat(loaders[name].getMineLinksFromText(response))
}, [])
}

function getSuggestedLinksFromText(response: TextResponse): string[] {
let names = Object.keys(loaders) as LoaderName[]
return names.reduce<string[]>((links, name) => {
return links.concat(loaders[name].getSuggestedLinksFromText(response))
}, [])
}

Expand Down Expand Up @@ -211,7 +216,12 @@ export async function addLink(url: string, deep = false): Promise<void> {
}
if (!deep) {
let links = getLinksFromText(response)
await Promise.all(links.map(i => addLink(i, true)))
if (links.length > 0) {
await Promise.all(links.map(i => addLink(i, true)))
} else if ($candidates.get().length === 0) {
let suggested = getSuggestedLinksFromText(response)
await Promise.all(suggested.map(i => addLink(i, true)))
}
}
if (byText === false) {
$links.setKey(url, { state: 'unknown' })
Expand Down
43 changes: 6 additions & 37 deletions core/test/loader/atom.test.ts
Expand Up @@ -69,8 +69,7 @@ test('detects xml:base attribute', () => {
{
url: 'http://example.com'
}
),
[]
)
),
[
'http://example.com/today/1.xml',
Expand Down Expand Up @@ -105,8 +104,7 @@ test('detects links', () => {
{
url: 'https://example.com/news/'
}
),
[]
)
),
[
'https://example.com/a',
Expand All @@ -133,8 +131,7 @@ test('finds atom links in <a> elements', () => {
{
url: 'https://example.com/news'
}
),
[]
)
),
[
'https://example.com/atom',
Expand All @@ -147,40 +144,12 @@ test('finds atom links in <a> elements', () => {

test('returns default links', () => {
deepStrictEqual(
loaders.atom.getMineLinksFromText(
loaders.atom.getSuggestedLinksFromText(
createTextResponse('<!DOCTYPE html><html><head></head></html>', {
url: 'https://example.com/news/'
}),
[]
),
['https://example.com/atom']
)
})

test('ignores default URL on RSS link', () => {
deepStrictEqual(
loaders.atom.getMineLinksFromText(
createTextResponse(
`<!DOCTYPE html>
<html>
<head>
<link rel="alternate" type="application/rss+xml" href="/rss">
</head>
</html>`
),
[]
),
[]
)
deepStrictEqual(
loaders.atom.getMineLinksFromText(
createTextResponse(
`<!DOCTYPE html>
<html></html>`
),
[{ loader: 'rss', title: 'RSS', url: 'https://example.com/rss' }]
})
),
[]
['https://example.com/feed', 'https://example.com/atom']
)
})

Expand Down
40 changes: 5 additions & 35 deletions core/test/loader/rss.test.ts
Expand Up @@ -39,8 +39,7 @@ test('detects links', () => {
{
url: 'https://example.com/news/'
}
),
[]
)
),
[
'https://example.com/a',
Expand All @@ -65,49 +64,20 @@ test('finds rss links in <a> elements', () => {
{
url: 'https://example.com/news'
}
),
[]
)
),
['https://example.com/rss', 'https://example.com/something.rss']
)
})

test('returns default links', () => {
deepStrictEqual(
loaders.rss.getMineLinksFromText(
loaders.rss.getSuggestedLinksFromText(
createTextResponse('<!DOCTYPE html><html><head></head></html>', {
url: 'https://example.com/news/'
}),
[]
),
['https://example.com/feed', 'https://example.com/rss']
)
})

test('ignores default URL on Atom link', () => {
deepStrictEqual(
loaders.rss.getMineLinksFromText(
createTextResponse(
`<!DOCTYPE html>
<html>
<head>
<link rel="alternate" type="application/atom+xml" href="/atom">
</head>
</html>`
),
[]
),
[]
)
deepStrictEqual(
loaders.atom.getMineLinksFromText(
createTextResponse(
`<!DOCTYPE html>
<html></html>`
),
[{ loader: 'atom', title: 'Atom', url: 'https://example.com/atom' }]
})
),
[]
['https://example.com/rss']
)
})

Expand Down

0 comments on commit eafd288

Please sign in to comment.