-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapper.py
104 lines (85 loc) · 2.72 KB
/
scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import pyfiglet
import datetime
from datetime import datetime
#multiprocess request
from multiprocessing import Pool
#nice figlet when running on terminal
from pyfiglet import Figlet
#onion.txt containing hidden services adresses
with open("onions.txt", "r") as onion:
content = onion.read().splitlines()
def editor():
"""
Add links to onions.txt using nano terminal text editor
"""
command = "nano onions.txt"
os.system(command)
def scraper_execution(url):
"""
Command line emulator for scraping
Note that you will probably never call that function directly
Arguments
--------------------------------------------------
url : Url of hidden services
"""
execute = 'python3 ./src/helper.py ' + '{}'.format(url)
print(execute)
try:
os.system(execute)
except:
logging.error("Scraping failed")
#multiprocessing wrapper for execution
def multiprocessing(task, processes=10):
"""
Multiprocessing wrapper
Clear former output directory and create a new one
Create one process for each onion service present in onion.txt
Note that this is not multithreading
Arguments
--------------------------------------------------
task : scraper_execution or editor
processes : Number of URLs that will be processed at the same time
"""
#output directory
if (os.path.exists("output")):
delete_command = str('rm -r output')
os.system(delete_command)
os.makedirs("output")
else:
os.makedirs("output")
with Pool(processes) as pool:
for onion in range(0, len(content)):
pool.apply(task, args=(content[onion],))
#Program banner
def banner():
banner = Figlet(font='slant')
print (banner.renderText('TorScraper'))
print ("\n")
#Program menu
def menu():
print ("Please select one of the following options:- \n")
print (" 1. Add links to onions.txt input file")
print (" 2. Scrap hidden services URLs present in onions.txt")
print (" 3. Exit program.\n")
if __name__ == '__main__':
scrap_active = 1
try:
while(scrap_active):
os.system("clear")
banner()
menu()
choice = int(input("Choose one option: "))
print("\n")
if choice == 1:
editor()
elif choice == 2:
begin_time = datetime.now()
multiprocessing(scraper_execution)
print("Execution time for {} hidden service".format(len(content)))
print(datetime.now() - begin_time)
else:
scrap_active = 0
quit()
except KeyboardInterrupt:
print("\n\nInterrupt received! Exiting cleanly...\n")