Skip to content

Commit

Permalink
Add support for multiple tor proxies
Browse files Browse the repository at this point in the history
  • Loading branch information
ayeowch committed Sep 12, 2020
1 parent f58e452 commit 8d42c7a
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 24 deletions.
3 changes: 2 additions & 1 deletion conf/crawl.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ exclude_ipv6_bogons = False
onion = True

# Tor proxy is required to connect to .onion address
tor_proxy = 127.0.0.1:9050
tor_proxies =
127.0.0.1:9050

# List of initial .onion nodes
onion_nodes =
Expand Down
6 changes: 5 additions & 1 deletion conf/pcap.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ rtt_count = 36
inv_count = 1000

# Tor proxy
tor_proxy = 127.0.0.1:9050
tor_proxies =
127.0.0.1:9050

# Relative path to directory containing pcap files
pcap_dir = data/pcap/f9beb4d9

# Sampling rate of pcap files
sampling_rate = 100
3 changes: 2 additions & 1 deletion conf/ping.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ ttl = 10800
onion = True

# Tor proxy is required to connect to .onion address
tor_proxy = 127.0.0.1:9050
tor_proxies =
127.0.0.1:9050

# Relative path to directory containing timestamp-prefixed JSON crawl files
crawl_dir = data/crawl/f9beb4d9
10 changes: 6 additions & 4 deletions crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import json
import logging
import os
import random
import redis
import redis.connection
import requests
Expand Down Expand Up @@ -119,7 +120,7 @@ def connect(redis_conn, key):

proxy = None
if address.endswith(".onion"):
proxy = CONF['tor_proxy']
proxy = random.choice(CONF['tor_proxies'])

conn = Connection((address, int(port)),
(CONF['source_address'], 0),
Expand Down Expand Up @@ -503,10 +504,11 @@ def init_conf(argv):
'exclude_ipv6_bogons')

CONF['onion'] = conf.getboolean('crawl', 'onion')
CONF['tor_proxy'] = None
CONF['tor_proxies'] = []
if CONF['onion']:
tor_proxy = conf.get('crawl', 'tor_proxy').split(":")
CONF['tor_proxy'] = (tor_proxy[0], int(tor_proxy[1]))
tor_proxies = conf.get('crawl', 'tor_proxies').strip().split("\n")
CONF['tor_proxies'] = [
(p.split(":")[0], int(p.split(":")[1])) for p in tor_proxies]
CONF['onion_nodes'] = conf.get('crawl', 'onion_nodes').strip().split("\n")

CONF['include_checked'] = conf.getboolean('crawl', 'include_checked')
Expand Down
31 changes: 18 additions & 13 deletions pcap.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import hashlib
import logging
import os
import random
import socket
import sys
import time
Expand Down Expand Up @@ -122,7 +123,7 @@ def cache_messages(self):
dst = (stream_id[2], stream_id[3])
node = src
is_tor = False
if src == CONF['tor_proxy']:
if src in CONF['tor_proxies']:
# dst port will be used to restore .onion node.
node = dst
is_tor = True
Expand Down Expand Up @@ -254,16 +255,17 @@ def cron():
logging.debug(err)
continue

logging.debug("Loading: %s", dump)

start = time.time()
cache = Cache(filepath=dump)
cache.cache_messages()
end = time.time()
elapsed = end - start

logging.info("Dump: %s (%d messages)", dump, cache.count)
logging.debug("Elapsed: %d", elapsed)
if 0 in random.sample(range(0, 100), CONF['sampling_rate']):
logging.debug("Loading: %s", dump)
start = time.time()
cache = Cache(filepath=dump)
cache.cache_messages()
end = time.time()
elapsed = end - start
logging.info("Dump: %s (%d messages)", dump, cache.count)
logging.debug("Elapsed: %d", elapsed)
else:
logging.debug("Dropped: %s", tmp)

os.remove(dump)

Expand All @@ -282,13 +284,16 @@ def init_conf(argv):
CONF['rtt_count'] = conf.getint('pcap', 'rtt_count')
CONF['inv_count'] = conf.getint('pcap', 'inv_count')

tor_proxy = conf.get('pcap', 'tor_proxy').split(":")
CONF['tor_proxy'] = (tor_proxy[0], int(tor_proxy[1]))
tor_proxies = conf.get('pcap', 'tor_proxies').strip().split("\n")
CONF['tor_proxies'] = [
(p.split(":")[0], int(p.split(":")[1])) for p in tor_proxies]

CONF['pcap_dir'] = conf.get('pcap', 'pcap_dir')
if not os.path.exists(CONF['pcap_dir']):
os.makedirs(CONF['pcap_dir'])

CONF['sampling_rate'] = conf.getint('pcap', 'sampling_rate')


def main(argv):
if len(argv) < 2 or not os.path.exists(argv[1]):
Expand Down
9 changes: 5 additions & 4 deletions ping.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def task():

proxy = None
if address.endswith(".onion"):
proxy = CONF['tor_proxy']
proxy = random.choice(CONF['tor_proxies'])

handshake_msgs = []
conn = Connection(node,
Expand Down Expand Up @@ -398,10 +398,11 @@ def init_conf(argv):
'nodes_per_ipv6_prefix')

CONF['onion'] = conf.getboolean('ping', 'onion')
CONF['tor_proxy'] = None
CONF['tor_proxies'] = []
if CONF['onion']:
tor_proxy = conf.get('ping', 'tor_proxy').split(":")
CONF['tor_proxy'] = (tor_proxy[0], int(tor_proxy[1]))
tor_proxies = conf.get('ping', 'tor_proxies').strip().split("\n")
CONF['tor_proxies'] = [
(p.split(":")[0], int(p.split(":")[1])) for p in tor_proxies]

CONF['crawl_dir'] = conf.get('ping', 'crawl_dir')
if not os.path.exists(CONF['crawl_dir']):
Expand Down

0 comments on commit 8d42c7a

Please sign in to comment.