-
-
Notifications
You must be signed in to change notification settings - Fork 288
/
novelsonline.py
78 lines (64 loc) · 2.42 KB
/
novelsonline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# -*- coding: utf-8 -*-
import logging
from typing import Generator, Union
from bs4 import BeautifulSoup, Tag
from lncrawl.models import Chapter, Volume
from lncrawl.templates.browser.general import GeneralBrowserTemplate
logger = logging.getLogger(__name__)
class NovelsOnline(GeneralBrowserTemplate):
base_url = ["https://novelsonline.net/"]
has_manga = False
has_mtl = False
# TODO: [OPTIONAL] This is called before all other methods.
def initialize(self) -> None:
self.cleaner.bad_tags.update(["div"])
self.cleaner.bad_css.update(
[
".trinity-player-iframe-wrapper",
".hidden",
".ads-title",
"script",
"center",
"interaction",
"a[href*=remove-ads]",
"a[target=_blank]",
"hr",
"br",
"#growfoodsmart",
".col-md-6",
".trv_player_container",
".ad1",
]
)
# TODO: [OPTIONAL] Open the Novel URL in the browser
def visit_novel_page_in_browser(self) -> BeautifulSoup:
self.visit(self.novel_url)
self.browser.wait(".container--content")
def parse_title(self, soup: BeautifulSoup) -> str:
tag = soup.select_one(".block-title h1")
assert tag
return tag.text.strip()
def parse_cover(self, soup: BeautifulSoup) -> str:
tag = soup.find("img", {"alt": self.novel_title})
assert tag
if tag.has_attr("data-src"):
return self.absolute_url(tag["data-src"])
elif tag.has_attr("src"):
return self.absolute_url(tag["src"])
def parse_authors(self, soup: BeautifulSoup) -> Generator[str, None, None]:
for a in soup.select("a[href*=author]"):
yield a.text.strip()
def parse_chapter_list(
self, soup: BeautifulSoup
) -> Generator[Union[Chapter, Volume], None, None]:
_id = 0
for a in soup.select(".chapters .chapter-chs li a"):
_id += 1
yield Chapter(
id=_id, url=self.absolute_url(a["href"]), title=a.text.strip()
)
def visit_chapter_page_in_browser(self, chapter: Chapter) -> None:
self.visit(chapter.url)
self.browser.wait(".container--content")
def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
return soup.select_one("#contentall")