/
request.ts
148 lines (117 loc) · 3.49 KB
/
request.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import http, {
RequestOptions,
IncomingMessage,
ClientRequest,
IncomingHttpHeaders
} from 'node:http'
import https from 'node:https'
import Url from 'node:url'
import querystring from 'node:querystring'
import HttpsProxyAgent from 'https-proxy-agent'
import { isObject, isUndefined } from './utils'
import { AnyObject } from './types/common'
import { LoaderCrawlDataDetail, LoaderCrawlFileDetail } from './api'
/* Type */
export interface Request {
statusCode: number | undefined
headers: IncomingHttpHeaders
data: Buffer
}
interface ContentConfig {
protocol: 'http:' | 'https:'
data: string | undefined
requestConfig: RequestOptions
}
function parseHeaders(
rawRequestConfig: LoaderCrawlDataDetail & LoaderCrawlFileDetail,
contentConfig: ContentConfig
) {
const rawHeaders = rawRequestConfig.headers ?? {}
const { requestConfig, data } = contentConfig
const headers: AnyObject = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
...rawHeaders
}
if (!isUndefined(data)) {
const defaultHeaderConfig = [
{ key: 'Content-Type', value: 'application/json' },
{ key: 'Content-Length', value: Buffer.byteLength(data) }
]
defaultHeaderConfig.forEach((item) => {
const { key, value } = item
if (isUndefined(rawHeaders[key])) {
headers[key] = value
}
})
}
requestConfig.headers = headers
}
function createContentConfig(
rawRequestConfig: LoaderCrawlDataDetail & LoaderCrawlFileDetail
): ContentConfig {
const { data: rawData, url, params, proxyUrl } = rawRequestConfig
const { protocol, hostname, port, pathname, search } = new Url.URL(url)
let path = pathname
if (search || params) {
if (search) {
path += `${search}${params ? '&' + querystring.stringify(params) : ''}`
} else {
path += `?${querystring.stringify(params)}`
}
}
const contentConfig: ContentConfig = {
requestConfig: {
agent: proxyUrl
? HttpsProxyAgent(proxyUrl)
: protocol === 'http:'
? new http.Agent()
: new https.Agent(),
protocol,
hostname,
port,
path,
method: rawRequestConfig.method?.toLocaleUpperCase() ?? 'GET',
headers: {},
timeout: rawRequestConfig.timeout
},
protocol: protocol as 'http:' | 'https:',
data: isObject(rawData) ? JSON.stringify(rawData) : rawData
}
parseHeaders(rawRequestConfig, contentConfig)
return contentConfig
}
export function request(config: LoaderCrawlDataDetail & LoaderCrawlFileDetail) {
return new Promise<Request>((resolve, reject) => {
const { requestConfig, protocol, data } = createContentConfig(config)
function handleRes(res: IncomingMessage) {
const { statusCode, headers } = res
const container: Buffer[] = []
res.on('data', (chunk) => container.push(chunk))
res.on('end', () => {
const data = Buffer.concat(container)
const resolveRes: Request = {
statusCode,
headers,
data
}
resolve(resolveRes)
})
}
const req: ClientRequest =
protocol === 'http:'
? http.request(requestConfig, handleRes)
: https.request(requestConfig, handleRes)
req.on('timeout', () => {
reject(new Error(`Timeout ${config.timeout}ms`))
})
req.on('error', (err) => {
reject(err)
})
// 其他处理
if (!isUndefined(data)) {
req.write(config.data)
}
req.end()
})
}