-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl-by-cursor.template.ts
83 lines (68 loc) · 1.61 KB
/
crawl-by-cursor.template.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// https://readhub.cn/topics
import axios from 'axios';
import {Crawler, Page} from ".";
// 定义分页参数
type PageParam = {
lastCursor?: number,
pageSize: number
}
// 定义每页中单个记录类型
type PageItem = {
id: string,
title: string,
order: number,
hasInstantView: boolean
}
// 字义每页中的通用字段
type PageExtra = {
pageSize: number,
totalItems: number,
totalPages: number
}
// 定义记录详情类型
type Detail = {
url: string,
title: string
}
// 初始化一个爬虫对象
const crawler = new Crawler<PageParam, PageItem, PageExtra, Detail>()
// 设置分页查询参数
.pageParam({lastCursor: null, pageSize: 20})
// 设置下一页查询参数
.nextPageParamFunc(({pageParams, prevPage}) => {
if (!prevPage || !prevPage.data || !prevPage.data.length) {
// 不需要再请求下一页了
return null;
}
return {
...pageParams,
lastCursor: prevPage.data[prevPage.data.length - 1].order
}
})
// 设置分页查询函数
.pageFunc(params => {
return axios.get<Page<PageItem, PageExtra>>('https://api.readhub.cn/topic', {params})
.then(res => {
return res.data;
});
})
// 设置查询详情的函数
.detailFunc(({item}) => {
if (!item.hasInstantView) {
return null;
}
return axios.get('https://api.readhub.cn/topic/instantview', {
params: {
topicId: item.id
}
})
.then(res => {
return res.data;
})
});
(async () => {
for await (const detail of crawler.all()) {
// 处理详情对象
console.log(detail);
}
})();