-
Notifications
You must be signed in to change notification settings - Fork 2
/
fb-site.py
executable file
·97 lines (77 loc) · 2.29 KB
/
fb-site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
import argparse
import pugsql
import time
import multiprocessing
from fbscraper.settings import LOG_LEVEL, LOG_FORMAT, LOG_DATEFMT, LOG_FILENAME
import logging
logging.basicConfig(
format=LOG_FORMAT,
datefmt=LOG_DATEFMT,
level=LOG_LEVEL,
handlers=[
logging.StreamHandler(),
logging.FileHandler(LOG_FILENAME, encoding="utf-8"),
],
)
logger = logging.getLogger(__name__)
# self-defined
import fbscraper.driver.site
import fbscraper.facebook as fb
from fbscraper.settings import (
SITE_DEFAULT_LIMIT_SEC,
POST_DEFAULT_LIMIT_SEC,
DB_URL,
DEFAULT_BROWSER_TYPE,
DEFAULT_EXECUTABLE_PATH,
)
db = pugsql.module("queries")
db.connect(DB_URL)
def update(args):
browser = fb.create_driver_without_session()
p = multiprocessing.Process(
target=fbscraper.driver.site.update,
args=(browser, db, args.id, args.article_limit_sec,),
)
p.start()
time.sleep(args.limit_sec)
# terminate
p.terminate()
# Cleanup
p.join()
def discover(args):
site = db.get_site_by_id(site_id=args.id)
browser = fb.create_driver_without_session()
fbscraper.driver.site.discover(browser, db, site, args.limit_sec)
if browser:
browser.quit()
def main(args):
if args.command == "discover":
discover(args)
elif args.command == "update":
update(args)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
cmds = parser.add_subparsers(title="sub command", dest="command", required=True)
discover_cmd = cmds.add_parser("discover", help="do discover")
discover_cmd.add_argument(
"id", type=int, help="id of the site to work on",
)
discover_cmd.add_argument(
"--limit-sec", type=int, help="process run time limit in seconds", default=3000
)
update_cmd = cmds.add_parser("update", help="do update")
update_cmd.add_argument(
"id", type=int, help="id of the site to work on",
)
update_cmd.add_argument(
"--limit-sec", type=int, help="process run time limit in seconds", default=3000
)
update_cmd.add_argument(
"--article-limit-sec",
type=int,
help="max load time in seconds for a post",
default=POST_DEFAULT_LIMIT_SEC,
)
args = parser.parse_args()
main(args)