Skip to content

Commit

Permalink
✨ feat: 支持解析bili mc镜像wiki
Browse files Browse the repository at this point in the history
  • Loading branch information
ZombieFly committed Aug 5, 2022
1 parent bfd0129 commit f85f93a
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 44 deletions.
2 changes: 1 addition & 1 deletion __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ async def _search(
# 给的数大了
await search.send(f'{numb}超出了索引')
raise FinishedException

#################
else:
# * 会话开启的第一次处理
try:
Expand Down
83 changes: 41 additions & 42 deletions mediawiki/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,7 @@ async def summary(
sentences=0,
chars=0,
auto_suggest=True,
redirect=True,
engine='wikiAPI'
redirect=True
) -> list[int, str]: # type: ignore
'''
页面id与纯文本简介
Expand All @@ -280,56 +279,56 @@ async def summary(
* redirect - allow redirection without raising RedirectError
'''

class Engine:
# 通过mediawiki api 获取简介
# use auto_suggest and redirect to get the correct article
# also, use page's error checking to raise DisambiguationError if necessary
page_info: WikipediaPage = await page(title, auto_suggest=auto_suggest, redirect=redirect)
title = page_info.title
pageid: int = page_info.pageid

async def wikiAPI(self,
title,
sentences=0,
chars=0,
auto_suggest=True,
redirect=True
) -> list[int, str]: # type: ignore
# 通过mediawiki api 获取简介
# use auto_suggest and redirect to get the correct article
# also, use page's error checking to raise DisambiguationError if necessary
page_info = await page(title, auto_suggest=auto_suggest, redirect=redirect)
title = page_info.title
pageid: int = page_info.pageid
query_params = {
'prop': 'extracts',
'explaintext': '',
'titles': title,

query_params = {
'prop': 'extracts',
'explaintext': '',
'titles': title,
}

if sentences:
query_params['exsentences'] = sentences
elif chars:
query_params['exchars'] = chars
else:
query_params['exintro'] = ''
request = await _wiki_request(query_params)
try:
summary = (request['query']['pages'][pageid]['extract']).strip()
except KeyError:

try:
# bili wiki解析
global USER_AGENT

headers = {
'User-Agent': USER_AGENT
}

if sentences:
query_params['exsentences'] = sentences
elif chars:
query_params['exchars'] = chars
else:
query_params['exintro'] = ''
request = await _wiki_request(query_params)
try:
summary = request['query']['pages'][pageid]['extract']
except KeyError:
raise NoExtractError()
async with httpx.AsyncClient(proxies=PROXIES, timeout=None) as client:
r = await client.get(page_info.url, headers=headers)

return [pageid, summary.strip()]
r = r.text

pageid, _summary = await getattr(Engine, engine)(Engine,
title,
sentences,
chars,
auto_suggest,
redirect)
# url = request['query']['pages'][pageid]['fullurl']
summary = re.sub(r"</?(.+?)>", "", ''.join(re.compile(
r'<p><b>(.*?)\n<p>\n<div').findall(r)))
if summary == '':
summary = re.sub(r"</?(.+?)>", "", ''.join(re.compile(
r'<p><b>(.*?)\n</p>').findall(r)))
except Exception:
raise NoExtractError()

# return [url, summary]
return [pageid, _summary]
return [pageid, summary] # type: ignore


async def page(title=None, pageid=None, auto_suggest=True, redirect=True, preload=False):
async def page(title=None, pageid=None, auto_suggest=True, redirect=True, preload=False) -> 'WikipediaPage':
'''
Get a WikipediaPage object for the page with title `title` or the pageid
`pageid` (mutually exclusive).
Expand Down
2 changes: 1 addition & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ async def output(
)

except NoExtractError:
return reply_out(msg_id, '目标wiki不支持extract')
return reply_out(msg_id, '目标wiki不支持生成简介')
except wiki.ApiReturnError:
return reply_out(msg_id, 'api多次返回异常,请检查api状态或稍后重试')

Expand Down

0 comments on commit f85f93a

Please sign in to comment.