diff --git a/src/lib/__tests__/useGetResourceLinks.test.ts b/src/lib/__tests__/useGetResourceLinks.test.ts
index b360e8990..fcb04177a 100644
--- a/src/lib/__tests__/useGetResourceLinks.test.ts
+++ b/src/lib/__tests__/useGetResourceLinks.test.ts
@@ -2,28 +2,51 @@ import { beforeEach, describe, expect, test, vi } from 'vitest';
import { fetchUrl, transformUrl } from '../useGetResourceLinks';
const htmlWithLinks = `
-
-
- arXiv
- DOI
- HTML
- CSS
- NASA
- PDF
- HTML Duplicate
+
+
+
+
+
`;
-const SKIP_URLS = [
- 'http://www.cfa.harvard.edu/sao',
- 'https://www.cfa.harvard.edu/',
- 'http://www.si.edu',
- 'http://www.nasa.gov',
-];
-
const expectedUrls = [
- { type: 'arXiv', url: 'https://arxiv.org/pdf/1234.5678.pdf' },
- { type: 'DOI', url: 'https://doi.org/10.1234/abcd' },
- { type: 'HTML', url: 'https://example.com/page.html' },
+ { type: 'arXiv', url: 'https://arxiv.org/abs/2310.03851' },
+ { type: 'arXiv', url: 'https://arxiv.org/pdf/2310.03851' },
+ { type: 'DOI', url: 'https://doi.org/10.3847/1538-4357/acffbd' },
{ type: 'PDF', url: 'https://example.com/document.pdf' },
];
@@ -38,12 +61,6 @@ describe('resourceLinks', () => {
expect(transformUrl('https://example.com/script.js')).toBeNull();
});
- test('transformUrl filters known skipped domains', () => {
- for (const url of SKIP_URLS) {
- expect(transformUrl(url)).toBeNull();
- }
- });
-
test('transformUrl assigns correct type', () => {
expect(transformUrl('https://arxiv.org/pdf/foo.pdf')).toEqual({
type: 'arXiv',
@@ -81,6 +98,8 @@ describe('resourceLinks', () => {
test('fetchUrl returns deduplicated transformed links', async () => {
const mockFetch = global.fetch as unknown as ReturnType;
mockFetch.mockResolvedValueOnce({
+ ok: true,
+ redirected: false,
text: () => Promise.resolve(htmlWithLinks),
});
@@ -91,7 +110,9 @@ describe('resourceLinks', () => {
test('fetchUrl returns empty list if input has no valid links', async () => {
const mockFetch = global.fetch as unknown as ReturnType;
mockFetch.mockResolvedValueOnce({
- text: () => Promise.resolve('No links here
'),
+ ok: true,
+ redirected: false,
+ text: () => Promise.resolve(''),
});
const result = await fetchUrl('fake-id');
@@ -104,14 +125,14 @@ describe('Redirected response', () => {
vi.resetAllMocks();
global.fetch = vi.fn();
});
- test('fetchUrl handles 302 redirect and uses Location header', async () => {
+
+ test('fetchUrl detects browser-followed redirect via res.redirected', async () => {
const mockFetch = global.fetch as unknown as ReturnType;
mockFetch.mockResolvedValueOnce({
- status: 302,
- headers: {
- get: (name: string) => (name === 'Location' ? 'https://doi.org/10.1234/foo' : null),
- },
- text: () => Promise.resolve(''), // not used in redirect
+ ok: true,
+ redirected: true,
+ url: 'https://doi.org/10.1234/foo',
+ text: () => Promise.resolve(''),
});
const result = await fetchUrl('test-id');
@@ -124,18 +145,51 @@ describe('Redirected response', () => {
]);
});
- test('fetchUrl returns empty if redirect has no Location', async () => {
+ test('fetchUrl returns empty if redirected URL is not valid', async () => {
const mockFetch = global.fetch as unknown as ReturnType;
mockFetch.mockResolvedValueOnce({
- status: 302,
- headers: {
- get: (() => null) as (name: string) => string | null,
- },
+ ok: true,
+ redirected: true,
+ url: '',
text: () => Promise.resolve(''),
});
const result = await fetchUrl('test-id');
+ expect(result).toEqual([]);
+ });
+});
+
+describe('Error responses', () => {
+ beforeEach(() => {
+ vi.resetAllMocks();
+ global.fetch = vi.fn();
+ });
+
+ test('fetchUrl returns empty list on 404', async () => {
+ const mockFetch = global.fetch as unknown as ReturnType;
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 404,
+ text: () =>
+ Promise.resolve(
+ 'The requested resource does not exist
' +
+ '',
+ ),
+ });
+
+ const result = await fetchUrl('bad-bibcode');
+ expect(result).toEqual([]);
+ });
+
+ test('fetchUrl returns empty list on 500', async () => {
+ const mockFetch = global.fetch as unknown as ReturnType;
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 500,
+ text: () => Promise.resolve('Internal Server Error'),
+ });
+ const result = await fetchUrl('error-bibcode');
expect(result).toEqual([]);
});
});
diff --git a/src/lib/useGetResourceLinks.ts b/src/lib/useGetResourceLinks.ts
index 1811f2ab7..6e5a3321a 100644
--- a/src/lib/useGetResourceLinks.ts
+++ b/src/lib/useGetResourceLinks.ts
@@ -3,7 +3,7 @@ import { isValidURL } from '@/utils/common/isValidURL';
export const resourceUrlTypes = ['arXiv', 'PDF', 'DOI', 'HTML', 'Other'] as const;
-export type ResourceUrlType = typeof resourceUrlTypes[number];
+export type ResourceUrlType = (typeof resourceUrlTypes)[number];
export interface IResourceUrl {
type: ResourceUrlType;
@@ -15,14 +15,6 @@ interface IUseResourceLinksProps {
options?: UseQueryOptions;
}
-// TODO: slightly brittle, since these links could change over time
-const SKIP_URLS = [
- 'http://www.cfa.harvard.edu/sao',
- 'https://www.cfa.harvard.edu/',
- 'http://www.si.edu',
- 'http://www.nasa.gov',
-];
-
const URL_TYPE_MAP: Record = {
arxiv: 'arXiv',
pdf: 'PDF',
@@ -31,14 +23,13 @@ const URL_TYPE_MAP: Record = {
};
const RESOURCE_EXT_REGEX = /\.(jpg|jpeg|png|gif|webp|svg|css|js|ico|woff2?|ttf|otf|eot|map|mp4|webm)(\?|$)/i;
-const URL_REGX = /href="(https?:\/\/[^"]*)"/gi;
/**
* Transforms a URL into a structured resource link object.
* @param url
*/
export const transformUrl = (url: string) => {
- if (!url || typeof url !== 'string' || !isValidURL(url) || RESOURCE_EXT_REGEX.test(url) || SKIP_URLS.includes(url)) {
+ if (!url || typeof url !== 'string' || !isValidURL(url) || RESOURCE_EXT_REGEX.test(url)) {
return null;
}
@@ -56,29 +47,37 @@ export const fetchUrl = async (identifier: string): Promise => {
const url = `/link_gateway/${encodeURIComponent(identifier)}/ESOURCE`;
const res = await fetch(url);
- // check for 302 redirects
- if (res.status === 302 || res.status === 301) {
- const redirectUrl = res.headers.get('Location');
- if (redirectUrl) {
- const transformedUrl = transformUrl(redirectUrl);
- return transformedUrl ? [transformedUrl] : [];
- }
+ if (!res.ok) {
return [];
}
+ // single-link resources redirect directly to the target URL
+ if (res.redirected) {
+ const transformedUrl = transformUrl(res.url);
+ return transformedUrl ? [transformedUrl] : [];
+ }
+
const raw = await res.text();
if (!raw) {
return [];
}
- const seen = new Set();
- const result = Array.from(raw.matchAll(URL_REGX), ([, href]) => transformUrl(href));
+ const parser = new DOMParser();
+ const doc = parser.parseFromString(raw, 'text/html');
+ const links = doc.querySelectorAll('.list-group-item a');
+ const seen = new Set();
const output: IResourceUrl[] = [];
- for (const res of result) {
- if (res && !seen.has(res.url)) {
- seen.add(res.url);
- output.push(res);
+
+ for (const link of links) {
+ const href = link.getAttribute('href');
+ if (!href) {
+ continue;
+ }
+ const transformed = transformUrl(href);
+ if (transformed && !seen.has(transformed.url)) {
+ seen.add(transformed.url);
+ output.push(transformed);
}
}