diff --git a/src/lib/__tests__/useGetResourceLinks.test.ts b/src/lib/__tests__/useGetResourceLinks.test.ts index b360e8990..fcb04177a 100644 --- a/src/lib/__tests__/useGetResourceLinks.test.ts +++ b/src/lib/__tests__/useGetResourceLinks.test.ts @@ -2,28 +2,51 @@ import { beforeEach, describe, expect, test, vi } from 'vitest'; import { fetchUrl, transformUrl } from '../useGetResourceLinks'; const htmlWithLinks = ` - - - arXiv - DOI - HTML - CSS - NASA - PDF - HTML Duplicate + +
+
+ +
+
+
+

links for 2023ApJ

+
+ + + + +
+
+ + `; -const SKIP_URLS = [ - 'http://www.cfa.harvard.edu/sao', - 'https://www.cfa.harvard.edu/', - 'http://www.si.edu', - 'http://www.nasa.gov', -]; - const expectedUrls = [ - { type: 'arXiv', url: 'https://arxiv.org/pdf/1234.5678.pdf' }, - { type: 'DOI', url: 'https://doi.org/10.1234/abcd' }, - { type: 'HTML', url: 'https://example.com/page.html' }, + { type: 'arXiv', url: 'https://arxiv.org/abs/2310.03851' }, + { type: 'arXiv', url: 'https://arxiv.org/pdf/2310.03851' }, + { type: 'DOI', url: 'https://doi.org/10.3847/1538-4357/acffbd' }, { type: 'PDF', url: 'https://example.com/document.pdf' }, ]; @@ -38,12 +61,6 @@ describe('resourceLinks', () => { expect(transformUrl('https://example.com/script.js')).toBeNull(); }); - test('transformUrl filters known skipped domains', () => { - for (const url of SKIP_URLS) { - expect(transformUrl(url)).toBeNull(); - } - }); - test('transformUrl assigns correct type', () => { expect(transformUrl('https://arxiv.org/pdf/foo.pdf')).toEqual({ type: 'arXiv', @@ -81,6 +98,8 @@ describe('resourceLinks', () => { test('fetchUrl returns deduplicated transformed links', async () => { const mockFetch = global.fetch as unknown as ReturnType; mockFetch.mockResolvedValueOnce({ + ok: true, + redirected: false, text: () => Promise.resolve(htmlWithLinks), }); @@ -91,7 +110,9 @@ describe('resourceLinks', () => { test('fetchUrl returns empty list if input has no valid links', async () => { const mockFetch = global.fetch as unknown as ReturnType; mockFetch.mockResolvedValueOnce({ - text: () => Promise.resolve('

No links here

'), + ok: true, + redirected: false, + text: () => Promise.resolve('
'), }); const result = await fetchUrl('fake-id'); @@ -104,14 +125,14 @@ describe('Redirected response', () => { vi.resetAllMocks(); global.fetch = vi.fn(); }); - test('fetchUrl handles 302 redirect and uses Location header', async () => { + + test('fetchUrl detects browser-followed redirect via res.redirected', async () => { const mockFetch = global.fetch as unknown as ReturnType; mockFetch.mockResolvedValueOnce({ - status: 302, - headers: { - get: (name: string) => (name === 'Location' ? 'https://doi.org/10.1234/foo' : null), - }, - text: () => Promise.resolve(''), // not used in redirect + ok: true, + redirected: true, + url: 'https://doi.org/10.1234/foo', + text: () => Promise.resolve(''), }); const result = await fetchUrl('test-id'); @@ -124,18 +145,51 @@ describe('Redirected response', () => { ]); }); - test('fetchUrl returns empty if redirect has no Location', async () => { + test('fetchUrl returns empty if redirected URL is not valid', async () => { const mockFetch = global.fetch as unknown as ReturnType; mockFetch.mockResolvedValueOnce({ - status: 302, - headers: { - get: (() => null) as (name: string) => string | null, - }, + ok: true, + redirected: true, + url: '', text: () => Promise.resolve(''), }); const result = await fetchUrl('test-id'); + expect(result).toEqual([]); + }); +}); + +describe('Error responses', () => { + beforeEach(() => { + vi.resetAllMocks(); + global.fetch = vi.fn(); + }); + + test('fetchUrl returns empty list on 404', async () => { + const mockFetch = global.fetch as unknown as ReturnType; + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 404, + text: () => + Promise.resolve( + '

The requested resource does not exist

' + + '', + ), + }); + + const result = await fetchUrl('bad-bibcode'); + expect(result).toEqual([]); + }); + + test('fetchUrl returns empty list on 500', async () => { + const mockFetch = global.fetch as unknown as ReturnType; + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + text: () => Promise.resolve('Internal Server Error'), + }); + const result = await fetchUrl('error-bibcode'); expect(result).toEqual([]); }); }); diff --git a/src/lib/useGetResourceLinks.ts b/src/lib/useGetResourceLinks.ts index 1811f2ab7..6e5a3321a 100644 --- a/src/lib/useGetResourceLinks.ts +++ b/src/lib/useGetResourceLinks.ts @@ -3,7 +3,7 @@ import { isValidURL } from '@/utils/common/isValidURL'; export const resourceUrlTypes = ['arXiv', 'PDF', 'DOI', 'HTML', 'Other'] as const; -export type ResourceUrlType = typeof resourceUrlTypes[number]; +export type ResourceUrlType = (typeof resourceUrlTypes)[number]; export interface IResourceUrl { type: ResourceUrlType; @@ -15,14 +15,6 @@ interface IUseResourceLinksProps { options?: UseQueryOptions; } -// TODO: slightly brittle, since these links could change over time -const SKIP_URLS = [ - 'http://www.cfa.harvard.edu/sao', - 'https://www.cfa.harvard.edu/', - 'http://www.si.edu', - 'http://www.nasa.gov', -]; - const URL_TYPE_MAP: Record = { arxiv: 'arXiv', pdf: 'PDF', @@ -31,14 +23,13 @@ const URL_TYPE_MAP: Record = { }; const RESOURCE_EXT_REGEX = /\.(jpg|jpeg|png|gif|webp|svg|css|js|ico|woff2?|ttf|otf|eot|map|mp4|webm)(\?|$)/i; -const URL_REGX = /href="(https?:\/\/[^"]*)"/gi; /** * Transforms a URL into a structured resource link object. * @param url */ export const transformUrl = (url: string) => { - if (!url || typeof url !== 'string' || !isValidURL(url) || RESOURCE_EXT_REGEX.test(url) || SKIP_URLS.includes(url)) { + if (!url || typeof url !== 'string' || !isValidURL(url) || RESOURCE_EXT_REGEX.test(url)) { return null; } @@ -56,29 +47,37 @@ export const fetchUrl = async (identifier: string): Promise => { const url = `/link_gateway/${encodeURIComponent(identifier)}/ESOURCE`; const res = await fetch(url); - // check for 302 redirects - if (res.status === 302 || res.status === 301) { - const redirectUrl = res.headers.get('Location'); - if (redirectUrl) { - const transformedUrl = transformUrl(redirectUrl); - return transformedUrl ? [transformedUrl] : []; - } + if (!res.ok) { return []; } + // single-link resources redirect directly to the target URL + if (res.redirected) { + const transformedUrl = transformUrl(res.url); + return transformedUrl ? [transformedUrl] : []; + } + const raw = await res.text(); if (!raw) { return []; } - const seen = new Set(); - const result = Array.from(raw.matchAll(URL_REGX), ([, href]) => transformUrl(href)); + const parser = new DOMParser(); + const doc = parser.parseFromString(raw, 'text/html'); + const links = doc.querySelectorAll('.list-group-item a'); + const seen = new Set(); const output: IResourceUrl[] = []; - for (const res of result) { - if (res && !seen.has(res.url)) { - seen.add(res.url); - output.push(res); + + for (const link of links) { + const href = link.getAttribute('href'); + if (!href) { + continue; + } + const transformed = transformUrl(href); + if (transformed && !seen.has(transformed.url)) { + seen.add(transformed.url); + output.push(transformed); } }