diff --git a/src/clis/xiaohongshu/user-helpers.test.ts b/src/clis/xiaohongshu/user-helpers.test.ts new file mode 100644 index 0000000..f990110 --- /dev/null +++ b/src/clis/xiaohongshu/user-helpers.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, it } from 'vitest'; +import { + buildXhsNoteUrl, + extractXhsUserNotes, + flattenXhsNoteGroups, + normalizeXhsUserId, +} from './user-helpers.js'; + +describe('normalizeXhsUserId', () => { + it('extracts the profile id from a full Xiaohongshu URL', () => { + expect( + normalizeXhsUserId( + 'https://www.xiaohongshu.com/user/profile/615529370000000002026001?xsec_source=pc_search' + ) + ).toBe('615529370000000002026001'); + }); + + it('keeps a bare profile id unchanged', () => { + expect(normalizeXhsUserId('615529370000000002026001')).toBe('615529370000000002026001'); + }); +}); + +describe('flattenXhsNoteGroups', () => { + it('flattens grouped note arrays and ignores empty groups', () => { + expect(flattenXhsNoteGroups([[{ id: 'a' }], [], null, [{ id: 'b' }]])).toEqual([ + { id: 'a' }, + { id: 'b' }, + ]); + }); +}); + +describe('buildXhsNoteUrl', () => { + it('includes xsec token when available', () => { + expect(buildXhsNoteUrl('user123', 'note456', 'token789')).toBe( + 'https://www.xiaohongshu.com/user/profile/user123/note456?xsec_token=token789&xsec_source=pc_user' + ); + }); +}); + +describe('extractXhsUserNotes', () => { + it('normalizes grouped note cards into CLI rows', () => { + const rows = extractXhsUserNotes( + { + noteGroups: [ + [ + { + id: 'note-1', + xsecToken: 'abc', + noteCard: { + noteId: 'note-1', + displayTitle: 'First note', + type: 'video', + interactInfo: { likedCount: '4.6万' }, + user: { userId: 'user-1' }, + }, + }, + { + noteCard: { + note_id: 'note-2', + display_title: 'Second note', + type: 'normal', + interact_info: { liked_count: 42 }, + }, + }, + ], + [], + ], + }, + 'fallback-user' + ); + + expect(rows).toEqual([ + { + id: 'note-1', + title: 'First note', + type: 'video', + likes: '4.6万', + url: 'https://www.xiaohongshu.com/user/profile/user-1/note-1?xsec_token=abc&xsec_source=pc_user', + }, + { + id: 'note-2', + title: 'Second note', + type: 'normal', + likes: '42', + url: 'https://www.xiaohongshu.com/user/profile/fallback-user/note-2', + }, + ]); + }); + + it('deduplicates repeated notes by note id', () => { + const rows = extractXhsUserNotes( + { + noteGroups: [ + [ + { noteCard: { noteId: 'dup-1', displayTitle: 'keep me' } }, + { noteCard: { noteId: 'dup-1', displayTitle: 'drop me' } }, + ], + ], + }, + 'fallback-user' + ); + + expect(rows).toHaveLength(1); + expect(rows[0]?.title).toBe('keep me'); + }); +}); diff --git a/src/clis/xiaohongshu/user-helpers.ts b/src/clis/xiaohongshu/user-helpers.ts new file mode 100644 index 0000000..b5ddb9c --- /dev/null +++ b/src/clis/xiaohongshu/user-helpers.ts @@ -0,0 +1,85 @@ +export interface XhsUserPageSnapshot { + noteGroups?: unknown; + pageData?: unknown; +} + +export interface XhsUserNoteRow { + id: string; + title: string; + type: string; + likes: string; + url: string; +} + +function toCleanString(value: unknown): string { + return typeof value === 'string' ? value.trim() : value == null ? '' : String(value).trim(); +} + +export function normalizeXhsUserId(input: string): string { + const trimmed = toCleanString(input); + const withoutQuery = trimmed.replace(/[?#].*$/, ''); + const matched = withoutQuery.match(/\/user\/profile\/([a-zA-Z0-9]+)/); + if (matched?.[1]) return matched[1]; + return withoutQuery.replace(/\/+$/, '').split('/').pop() ?? withoutQuery; +} + +export function flattenXhsNoteGroups(noteGroups: unknown): any[] { + if (!Array.isArray(noteGroups)) return []; + + const notes: any[] = []; + for (const group of noteGroups) { + if (!group) continue; + if (Array.isArray(group)) { + for (const item of group) { + if (item) notes.push(item); + } + continue; + } + notes.push(group); + } + + return notes; +} + +export function buildXhsNoteUrl(userId: string, noteId: string, xsecToken?: string): string { + const cleanUserId = toCleanString(userId); + const cleanNoteId = toCleanString(noteId); + if (!cleanUserId || !cleanNoteId) return ''; + + const url = new URL(`https://www.xiaohongshu.com/user/profile/${cleanUserId}/${cleanNoteId}`); + const cleanToken = toCleanString(xsecToken); + if (cleanToken) { + url.searchParams.set('xsec_token', cleanToken); + url.searchParams.set('xsec_source', 'pc_user'); + } + return url.toString(); +} + +export function extractXhsUserNotes(snapshot: XhsUserPageSnapshot, fallbackUserId: string): XhsUserNoteRow[] { + const notes = flattenXhsNoteGroups(snapshot.noteGroups); + const rows: XhsUserNoteRow[] = []; + const seen = new Set(); + + for (const entry of notes) { + const noteCard = entry?.noteCard ?? entry?.note_card ?? entry; + if (!noteCard || typeof noteCard !== 'object') continue; + + const noteId = toCleanString(noteCard.noteId ?? noteCard.note_id ?? entry?.noteId ?? entry?.note_id ?? entry?.id); + if (!noteId || seen.has(noteId)) continue; + seen.add(noteId); + + const userId = toCleanString(noteCard.user?.userId ?? noteCard.user?.user_id ?? fallbackUserId); + const xsecToken = toCleanString(entry?.xsecToken ?? entry?.xsec_token ?? noteCard.xsecToken ?? noteCard.xsec_token); + const likes = toCleanString(noteCard.interactInfo?.likedCount ?? noteCard.interact_info?.liked_count ?? 0) || '0'; + + rows.push({ + id: noteId, + title: toCleanString(noteCard.displayTitle ?? noteCard.display_title ?? noteCard.title), + type: toCleanString(noteCard.type), + likes, + url: buildXhsNoteUrl(userId || fallbackUserId, noteId, xsecToken), + }); + } + + return rows; +} diff --git a/src/clis/xiaohongshu/user.ts b/src/clis/xiaohongshu/user.ts index 1e73614..97d6517 100644 --- a/src/clis/xiaohongshu/user.ts +++ b/src/clis/xiaohongshu/user.ts @@ -1,45 +1,65 @@ import { cli, Strategy } from '../../registry.js'; +import { extractXhsUserNotes, normalizeXhsUserId } from './user-helpers.js'; + +async function readUserSnapshot(page: any) { + return await page.evaluate(` + (() => { + const safeClone = (value) => { + try { + return JSON.parse(JSON.stringify(value ?? null)); + } catch { + return null; + } + }; + + const userStore = window.__INITIAL_STATE__?.user || {}; + return { + noteGroups: safeClone(userStore.notes?._value || userStore.notes || []), + pageData: safeClone(userStore.userPageData?._value || userStore.userPageData || {}), + }; + })() + `); +} cli({ site: 'xiaohongshu', name: 'user', - description: 'Get user notes from Xiaohongshu', - domain: 'xiaohongshu.com', - strategy: Strategy.INTERCEPT, + description: 'Get public notes from a Xiaohongshu user profile', + domain: 'www.xiaohongshu.com', + strategy: Strategy.COOKIE, browser: true, args: [ - { name: 'id', type: 'string', required: true }, - { name: 'limit', type: 'int', default: 15 }, + { name: 'id', type: 'string', required: true, help: 'User id or profile URL' }, + { name: 'limit', type: 'int', default: 15, help: 'Number of notes to return' }, ], columns: ['id', 'title', 'type', 'likes', 'url'], func: async (page, kwargs) => { - await page.installInterceptor('v1/user/posted'); - - await page.goto(`https://www.xiaohongshu.com/user/profile/${kwargs.id}`); - await page.wait(5); - - // Trigger API by scrolling - await page.autoScroll({ times: 2, delayMs: 2000 }); - - // Retrieve data - const requests = await page.getInterceptedRequests(); - if (!requests || requests.length === 0) return []; - - let results: any[] = []; - for (const req of requests) { - if (req.data && req.data.data && req.data.data.notes) { - for (const note of req.data.data.notes) { - results.push({ - id: note.note_id || note.id, - title: note.display_title || '', - type: note.type || '', - likes: note.interact_info?.liked_count || '0', - url: `https://www.xiaohongshu.com/explore/${note.note_id || note.id}` - }); - } - } + const userId = normalizeXhsUserId(String(kwargs.id)); + const limit = Math.max(1, Number(kwargs.limit ?? 15)); + + await page.goto(`https://www.xiaohongshu.com/user/profile/${userId}`); + await page.wait(3); + + let snapshot = await readUserSnapshot(page); + let results = extractXhsUserNotes(snapshot ?? {}, userId); + let previousCount = results.length; + + for (let i = 0; results.length < limit && i < 4; i += 1) { + await page.autoScroll({ times: 1, delayMs: 1500 }); + await page.wait(1); + + snapshot = await readUserSnapshot(page); + const nextResults = extractXhsUserNotes(snapshot ?? {}, userId); + if (nextResults.length <= previousCount) break; + + results = nextResults; + previousCount = nextResults.length; + } + + if (results.length === 0) { + throw new Error('No public notes found for this Xiaohongshu user.'); } - return results.slice(0, kwargs.limit); - } + return results.slice(0, limit); + }, });