feat(douban): add movie adapter with search, top250, subject, marks, reviews commands (#239)

ykfnxx · jackwener · web-flow · commit 70651d3ba862 · 2026-03-22T17:57:34.000+08:00
* feat(douban): add movie adapter with search, top250, subject, marks, reviews commands

- search: search movies by keyword
- top250: get top 250 movies
- subject: get movie details by id
- marks: export personal viewing marks
- reviews: export personal movie reviews

* review: resolve douban adapter blockers

---------

Co-authored-by: jackwener &lt;jakevingoo@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -161,6 +161,7 @@ Run `opencli list` for the live registry.
 | **stackoverflow** | `hot` `search` `bounties` `unanswered` | Public |
 | **steam** | `top-sellers` | Public |
 | **weread** | `shelf` `search` `book` `highlights` `notes` `notebooks` `ranking` | Browser |
+| **douban** | `search` `top250` `subject` `marks` `reviews` | Browser |
 
 > **Bloomberg note**: The RSS-backed Bloomberg listing commands (`main`, section feeds, `feeds`) work without a browser. `bloomberg news` is for standard Bloomberg story/article pages that your current Chrome session can already access. Audio and some other non-standard pages may fail, and OpenCLI does not bypass Bloomberg paywall or entitlement checks.
 
diff --git a/README.zh-CN.md b/README.zh-CN.md
@@ -162,6 +162,7 @@ npm install -g @jackwener/opencli@latest
 | **stackoverflow** | `hot` `search` `bounties` `unanswered` | 公开 |
 | **steam** | `top-sellers` | 公开 |
 | **weread** | `shelf` `search` `book` `highlights` `notes` `notebooks` `ranking` | 浏览器 |
+| **douban** | `search` `top250` `subject` `marks` `reviews` | 浏览器 |
 
 > **Bloomberg 说明**：Bloomberg 的 RSS 列表命令（`main`、各栏目 feed、`feeds`）无需浏览器即可使用。`bloomberg news` 适用于当前 Chrome 会话本身就能访问的标准 Bloomberg 文章页。音频页和部分非标准页面可能失败，OpenCLI 也不会绕过 Bloomberg 的付费墙、登录或权限校验。
 
diff --git a/src/clis/douban/marks.ts b/src/clis/douban/marks.ts
@@ -0,0 +1,135 @@
+import { cli, Strategy } from '../../registry.js';
+import type { IPage } from '../../types.js';
+import { DoubanMark, getSelfUid } from './utils.js';
+
+cli({
+  site: 'douban',
+  name: 'marks',
+  description: '导出个人观影标记',
+  domain: 'movie.douban.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { 
+      name: 'status', 
+      default: 'collect',
+      choices: ['collect', 'wish', 'do', 'all'],
+      help: '标记类型: collect(看过), wish(想看), do(在看), all(全部)' 
+    },
+    { name: 'limit', type: 'int', default: 50, help: '导出数量， 0 表示全部' },
+    { name: 'uid', help: '用户ID，不填则使用当前登录账号' },
+  ],
+  columns: ['title', 'year', 'myRating', 'myStatus', 'myDate', 'myComment', 'url'],
+  func: async (page: IPage, kwargs: { status?: string; limit?: number; uid?: string }) => {
+    const { status = 'collect', limit = 50, uid: providedUid } = kwargs;
+    
+    const uid = providedUid || await getSelfUid(page);
+    
+    const statuses = status === 'all' 
+      ? ['collect', 'wish', 'do'] 
+      : [status];
+    
+    const allMarks: DoubanMark[] = [];
+    
+    for (const s of statuses) {
+      const remaining = limit > 0 ? limit - allMarks.length : 0;
+      if (limit > 0 && remaining <= 0) break;
+      
+      const marks = await fetchMarks(page, uid, s, remaining);
+      allMarks.push(...marks);
+    }
+    
+    return allMarks.slice(0, limit > 0 ? limit : undefined);
+  },
+});
+
+async function fetchMarks(
+  page: IPage, 
+  uid: string, 
+  status: string, 
+  limit: number
+): Promise<DoubanMark[]> {
+  const marks: DoubanMark[] = [];
+  let offset = 0;
+  const pageSize = 30;
+  
+  while (true) {
+    const url = `https://movie.douban.com/people/${uid}/${status}?start=${offset}&sort=time&rating=all&filter=all&mode=grid`;
+    
+    await page.goto(url);
+    
+    await page.wait({ time: 2 });
+    
+    const pageMarks = await page.evaluate(`
+      () => {
+        const results = [];
+        
+        const items = document.querySelectorAll('.item');
+        
+        items.forEach(item => {
+          const titleLink = item.querySelector('.info a[href*="/subject/"]');
+          if (!titleLink) return;
+          
+          const titleEl = titleLink.querySelector('em');
+          const titleText = titleEl?.textContent?.trim() || titleLink.textContent?.trim() || '';
+          const title = titleText.split('/')[0].trim();
+          const href = titleLink.href || '';
+          
+          const idMatch = href.match(/subject\\/(\\d+)/);
+          const movieId = idMatch ? idMatch[1] : '';
+          
+          if (!movieId || !title) return;
+          
+          const ratingSpan = item.querySelector('span[class*="rating"]');
+          let myRating = null;
+          if (ratingSpan) {
+            const cls = ratingSpan.className || '';
+            const ratingMatch = cls.match(/rating(\\d)-t/);
+            if (ratingMatch) {
+              myRating = parseInt(ratingMatch[1], 10) * 2;
+            }
+          }
+          
+          const dateSpan = item.querySelector('.date');
+          const myDate = dateSpan?.textContent?.trim() || '';
+          
+          const commentSpan = item.querySelector('.comment');
+          const myComment = commentSpan?.textContent?.trim() || '';
+          
+          const introSpan = item.querySelector('.intro');
+          let year = '';
+          if (introSpan) {
+            const introText = introSpan.textContent || '';
+            const yearMatch = introText.match(/(\\d{4})/);
+            year = yearMatch ? yearMatch[1] : '';
+          }
+          
+          results.push({
+            movieId,
+            title,
+            year,
+            myRating,
+            myStatus: '${status}',
+            myComment,
+            myDate,
+            url: href || 'https://movie.douban.com/subject/' + movieId
+          });
+        });
+        
+        return results;
+      }
+    `) as DoubanMark[];
+    
+    if (!pageMarks || pageMarks.length === 0) break;
+    
+    marks.push(...pageMarks);
+    
+    if (pageMarks.length < pageSize) break;
+    if (limit > 0 && marks.length >= limit) break;
+    
+    offset += pageSize;
+    
+    await new Promise(resolve => setTimeout(resolve, 1000));
+  }
+  
+  return marks;
+}
diff --git a/src/clis/douban/reviews.ts b/src/clis/douban/reviews.ts
@@ -0,0 +1,127 @@
+import { cli, Strategy } from '../../registry.js';
+import type { IPage } from '../../types.js';
+import { getSelfUid, DoubanReview } from './utils.js';
+
+cli({
+  site: 'douban',
+  name: 'reviews',
+  description: '导出个人影评',
+  domain: 'movie.douban.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'limit', type: 'int', default: 20, help: '导出数量' },
+    { name: 'uid', help: '用户ID，不填则使用当前登录账号' },
+    { name: 'full', type: 'bool', default: false, help: '获取完整影评内容' },
+  ],
+  columns: ['movieTitle', 'title', 'myRating', 'votes', 'content', 'url'],
+  func: async (page: IPage, kwargs: { limit?: number; uid?: string; full?: boolean }) => {
+    const { limit = 20, uid: providedUid, full = false } = kwargs;
+    
+    const uid = providedUid || await getSelfUid(page);
+    const reviews = await fetchReviews(page, uid, limit, full);
+    
+    return reviews;
+  },
+});
+
+async function fetchReviews(
+  page: IPage,
+  uid: string,
+  limit: number,
+  full: boolean,
+): Promise<DoubanReview[]> {
+  const reviews: DoubanReview[] = [];
+  let start = 0;
+  const pageSize = 20;
+  
+  while (true) {
+    const url = `https://movie.douban.com/people/${uid}/reviews?start=${start}&sort=time`;
+    
+    await page.goto(url);
+    
+    await page.wait({ time: 1 });
+    
+    const data = await page.evaluate(`
+      () => {
+        const reviews = [];
+        
+        document.querySelectorAll('.tlst').forEach(el => {
+          const movieLinkEl = el.querySelector('.ilst a');
+          const reviewTitleEl = el.querySelector('.nlst a[title]');
+          const ratingEl = el.querySelector('.clst span[class*="allstar"]');
+          const contentEl = el.querySelector('.review-short span');
+          const votesEl = el.querySelector('.review-short .pl span');
+          
+          const movieHref = movieLinkEl?.href || '';
+          const movieId = movieHref.match(/subject\\/(\\d+)/)?.[1] || '';
+          const movieTitle = movieLinkEl?.getAttribute('title') || movieLinkEl?.textContent?.trim() || '';
+          
+          const reviewHref = reviewTitleEl?.href || '';
+          const reviewId = reviewHref.match(/reviews\\/(\\d+)/)?.[1] || '';
+          const title = reviewTitleEl?.textContent?.trim() || '';
+          
+          let myRating = 0;
+          if (ratingEl) {
+            const cls = ratingEl.className || '';
+            const ratingMatch = cls.match(/allstar(\\d)0/);
+            if (ratingMatch) {
+              myRating = parseInt(ratingMatch[1], 10) * 2;
+            }
+          }
+          
+          const votesText = votesEl?.textContent || '';
+          const votesMatch = votesText.match(/(\\d+)/);
+          const votes = votesMatch ? parseInt(votesMatch[1], 10) : 0;
+          
+          reviews.push({
+            reviewId,
+            movieId,
+            movieTitle,
+            title,
+            content: contentEl?.textContent?.trim() || '',
+            myRating,
+            createdAt: '',
+            votes,
+            url: reviewHref,
+          });
+        });
+        
+        return reviews;
+      }
+    `) as DoubanReview[];
+    
+    reviews.push(...data);
+    
+    if (data.length < pageSize) break;
+    if (limit > 0 && reviews.length >= limit) break;
+    
+    start += pageSize;
+  }
+  
+  const result = reviews.slice(0, limit > 0 ? limit : undefined);
+  
+  if (full && result.length > 0) {
+    for (const review of result) {
+      if (review.url) {
+        const fullContent = await fetchFullReview(page, review.url);
+        review.content = fullContent;
+      }
+    }
+  }
+  
+  return result;
+}
+
+async function fetchFullReview(page: IPage, reviewUrl: string): Promise<string> {
+  await page.goto(reviewUrl);
+  await page.wait({ time: 1 });
+  
+  const content = await page.evaluate(`
+    () => {
+      const contentEl = document.querySelector('.review-content');
+      return contentEl?.textContent?.trim() || '';
+    }
+  `) as string;
+  
+  return content;
+}
diff --git a/src/clis/douban/subject.yaml b/src/clis/douban/subject.yaml
@@ -0,0 +1,76 @@
+site: douban
+name: subject
+description: 获取电影详情
+domain: movie.douban.com
+strategy: cookie
+browser: true
+
+args:
+  id:
+    positional: true
+    required: true
+    type: str
+    description: 电影 ID
+
+pipeline:
+  - navigate: https://movie.douban.com/subject/${{ args.id }}
+
+  - evaluate: |
+      (async () => {
+        const id = '${{ args.id }}';
+        
+        // Wait for page to load
+        await new Promise(r => setTimeout(r, 2000));
+        
+        // Extract title
+        const titleEl = document.querySelector('span[property="v:itemreviewed"]');
+        const title = titleEl?.textContent?.trim() || '';
+        
+        // Extract original title
+        const ogTitleEl = document.querySelector('span[property="v:originalTitle"]');
+        const originalTitle = ogTitleEl?.textContent?.trim() || '';
+        
+        // Extract year
+        const yearEl = document.querySelector('.year');
+        const year = yearEl?.textContent?.trim() || '';
+        
+        // Extract rating
+        const ratingEl = document.querySelector('strong[property="v:average"]');
+        const rating = parseFloat(ratingEl?.textContent || '0');
+        
+        // Extract rating count
+        const ratingCountEl = document.querySelector('span[property="v:votes"]');
+        const ratingCount = parseInt(ratingCountEl?.textContent || '0', 10);
+        
+        // Extract genres
+        const genreEls = document.querySelectorAll('span[property="v:genre"]');
+        const genres = Array.from(genreEls).map(el => el.textContent?.trim()).filter(Boolean).join(',');
+        
+        // Extract directors
+        const directorEls = document.querySelectorAll('a[rel="v:directedBy"]');
+        const directors = Array.from(directorEls).map(el => el.textContent?.trim()).filter(Boolean).join(',');
+        
+        // Extract casts
+        const castEls = document.querySelectorAll('a[rel="v:starring"]');
+        const casts = Array.from(castEls).slice(0, 5).map(el => el.textContent?.trim()).filter(Boolean).join(',');
+        
+        // Extract summary
+        const summaryEl = document.querySelector('span[property="v:summary"]');
+        const summary = summaryEl?.textContent?.trim() || '';
+        
+        return [{
+          id,
+          title,
+          originalTitle,
+          year,
+          rating,
+          ratingCount,
+          genres,
+          directors,
+          casts,
+          summary: summary.substring(0, 200),
+          url: `https://movie.douban.com/subject/${id}`
+        }];
+      })()
+
+columns: [id, title, originalTitle, year, rating, ratingCount, genres, directors, casts, summary, url]
diff --git a/src/clis/douban/top250.yaml b/src/clis/douban/top250.yaml
diff --git a/src/clis/douban/utils.ts b/src/clis/douban/utils.ts