-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
65 lines (57 loc) · 1.97 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!python3
"""
Something something great program, great programmer, license.
"""
from compare_docs import compare_docs
from datastore import initialize_db, store_topic_to_db
from get_articles import get_topic_articles, move_to_random_topic, move_through_topic
from datetime import datetime
from os import rename
import pickle
import sys
from utils import log_setup, microsoft_char_counter
def main():
log_setup()
#rename('wiki-bot.log', 'wiki-bot.log-old')
initialize_db()
topics_to_parse = []
try:
with open('progress.pickle', 'rb') as f:
visited_topics = pickle.load(f)
except:
visited_topics = ['History']
topics_to_parse.append(move_through_topic(visited_topics[0]))
while topics_to_parse:
try:
start = datetime.now()
# Breadth first search:
try:
page = next(topics_to_parse[0])
except StopIteration:
topics_to_parse.pop()
continue
if 'Category' in page.title():
topics_to_parse.append(
CategorizedPageGenerator(page, recurse=True))
continue
# skip what has already been visited:
if page.title() in visited_topics:
continue
# valid page, parse topic:
topic = get_topic_articles(page)
if not topic:
continue
# valid topic worth comparing:
visited_topics.append(page.title())
with open('progress.pickle', 'wb') as f:
pickle.dump(visited_topics, f)
print(topic['title'])
memory_data_store = compare_docs(topic)
if not memory_data_store:
continue
store_topic_to_db(memory_data_store)
print("Duration to process the topic: ", str(datetime.now() - start))
except KeyboardInterrupt:
sys.exit('\nFinished.')
if __name__ == '__main__':
main()