Methodology was based on Section 6.2 of paper "Quantifying measurement quality and load distribution in Tor": https://dl.acm.org/doi/pdf/10.1145/3427228.3427238

* I can always create a new container image with a Tor process representing each client and it will always choose a new guard node, so I don't have to change the default Tor definitions
* Log all relays along with their data, specifically geographical location, used in each client circuit.
* In the paper, the authors logged over 8.6 million circuits, representing about 275,000 circuits per day
* Drop the circuit immediately after creation to ensure not overloading the guard relays
* Wait 16 minutes between each circuit

In [1]:
!pip install stem



In [2]:
import subprocess
import requests
import shlex
import traceback
import time
import threading
from stem import CircStatus
from stem.control import Controller
import stem.process


In [3]:
BASE_DIR = 'data'
SOCKS_PORT = 9050
CONTROL_PORT = 9051
ONION_SERVICE_FOLDER = 'onion_service/'
WEB_PORT = 8080

GUARD_NODES_FILE = 'guard_nodes_onion.joblib'
MIDDLE_NODES_FILE = 'middle_nodes_onion.joblib'
EXIT_NODES_FILE = 'exit_nodes_onion.joblib'

### More stem references
* https://tor.stackexchange.com/questions/7049/stem-how-to-get-current-in-use-circuit
* https://github.com/webfp/tor-browser-selenium/blob/main/examples/stem_adv.py
* https://stem.torproject.org/tutorials/to_russia_with_love.html#custom-path-selection
* https://stem.torproject.org/api.html
* https://stem.torproject.org/api/descriptor/hidden_service.html
* Measure times of circuit establishment, including to introduction points, using stem, is only two years old, and supports onion services V3: https://github.com/mobilesec/onion-service-time-measurement/tree/main


In [6]:
def get_geolocation(ip_address):
    url = f"https://ipinfo.io/{ip_address}/json"
    response = requests.get(url)
    data = response.json()

    print(f"IP Address: {ip_address}")
    print(f"Location: {data.get('city', '')}, {data.get('region', '')}, {data.get('country', '')}")
    print(f"ISP: {data.get('org', '')}")
    print("=" * 30)

    return data

In [7]:
def get_circuit_data(controller, circuit):
    guard_ip, middle_ip, exit_ip = None, None, None
    print("Circuit characteristics:", circuit)
    if circuit.status == CircStatus.BUILT and len(circuit.path) >=3:
        guard_fingerprint = circuit.path[0][0]  # The first hop is the guard relay
        print("GUARD", circuit.path[0])
        middle_fingerprint = circuit.path[1][0]
        print("MIDDLE", circuit.path[1])
        exit_fingerprint = circuit.path[2][0]
        print("LAST", circuit.path[2])

        # Get relay details for each hop
        guard_relay = controller.get_network_status(guard_fingerprint)
        middle_relay = controller.get_network_status(middle_fingerprint)
        exit_relay = controller.get_network_status(exit_fingerprint)

        print("Guard relay flags", guard_relay.flags)
        print("Middle relay flags", middle_relay.flags)
        print("Exit relay flags", exit_relay.flags)

        guard_ip =  guard_relay.address
        middle_ip = middle_relay.address
        exit_ip = exit_relay.address

    else:
        print("Circuit skipped")

    return guard_ip, middle_ip, exit_ip

In [8]:
def change_permissions_onion_service_folder():
    command = f"chmod 700 {ONION_SERVICE_FOLDER}"
    tor_process = subprocess.Popen(shlex.split(command))
    return tor_process

In [9]:
def log_callback(line):
    print(line)

### How to create circuits with onion services
* https://gist.github.com/PaulSec/ec8f1689bfde3ce9a920

In [11]:
def create_dummy_onion_service():
    return stem.process.launch_tor_with_config(
        config={
            'SocksPort': str(SOCKS_PORT),
            'ControlPort': str(CONTROL_PORT),
            'HiddenServiceDir': {ONION_SERVICE_FOLDER},
            'HiddenServicePort': f'80 127.0.0.1:{WEB_PORT}',
            'NumEntryGuards': '0' # So that it keeps using new guards
            'Log': 'NOTICE stdout'  # Redirect log messages to stdout
        },
        completion_percent=100,
        init_msg_handler=log_callback
    )

In [12]:
def get_onion_url():
    with open(ONION_SERVICE_FOLDER+'hostname') as f:
        onion_url = f.readline()
    onion_url = onion_url.split('\n')[0]
    return onion_url

In [None]:
guard_nodes = {}
middle_nodes = {}
exit_nodes = {}

In [None]:
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=10)
def create_new_circuit_and_close(controller, iteration):
    print(f"RETRY create_new_circuit({iteration})")
    
    circuit_id = controller.new_circuit(await_build=True, timeout=60)
    circuit = controller.get_circuits()[-1]

    guard_ip, middle_ip, exit_ip = get_circuit_data(controller, circuit)
    ip, country, isp = get_geolocation(guard_ip)
    guard_nodes[ip] = {'country': country, 'isp': isp}
    ip, country, isp = get_geolocation(middle_ip)
    middle_nodes[ip] = {'country': country, 'isp': isp}
    ip, country, isp = get_geolocation(exit_ip)
    exit_nodes[ip] = {'country': country, 'isp': isp}

    controller.close_circuit(circuit_id)

In [None]:
def change_guard_node(controller):
    # We need to have the line "UseEntryGuards 0" in the torrc for this to work
    controller.signal("NEWNYM")
    print("Requested a new identity (including new guard nodes)")

In [13]:

def connect_to_controller(iteration):
    with Controller.from_port(address="127.0.0.1", port=CONTROL_PORT) as controller:
        controller.authenticate()
        print("Successfully authenticated with Tor control port")

        print("Created {} preemptive circuits".format(len(controller.get_circuits())))
        print("Tor will chose a random one from these to start the next session")
        
        print("=== Iteration {}".format(iteration))
        try:
            create_new_circuit_and_close(controller, iteration)
        except Exception as e:
            print(f"create_new_circuit_and_close() failed for iteration {iteration} after 10 attempts. Continuing ...")
            time.sleep(10)
        change_guard_node(controller)
    

### Check if Tor is already running
First, restart the kernel. Then, run the following commands:
```
sudo lsof -i -P | grep LISTEN | grep 9050
sudo kill -9 95441
```

In [14]:
#stop_tor()
# Start Tor process in the background
#tor_process = start_tor()
change_permissions_onion_service_folder()
onion_process = None
try:
    onion_process = create_dummy_onion_service()
    print("onion_process", onion_process)
    print("Tor process started. Waiting for a moment...")
    time.sleep(10)

except Exception as e:
    traceback.print_exc()
    if onion_process:
        onion_process.terminate()
        print("Exited Tor process cleanly after error")

try:
    iterations = 10000
    for iteration in tqdm(range(iterations)):
        connect_to_controller(iteration)
        # So that it updates at every iteration
        joblib.dump(guard_nodes, GUARD_NODES_FILE)
        joblib.dump(middle_nodes, MIDDLE_NODES_FILE)
        joblib.dump(exit_nodes, EXIT_NODES_FILE)

except Exception as e:
    traceback.print_exc()

finally:
    if onion_process:
        onion_process.terminate()
        print("Exited Tor process cleanly at the end")


    

Aug 15 12:39:32.804 [notice] Tor 0.4.7.14 running on Darwin with Libevent 2.1.12-stable, OpenSSL 3.1.2, Zlib 1.2.11, Liblzma N/A, Libzstd N/A and Unknown N/A as libc.
Aug 15 12:39:32.804 [notice] Tor can't help you if you use it wrong! Learn how to be safe at https://support.torproject.org/faq/staying-anonymous/
Aug 15 12:39:32.805 [warn] Path for HiddenServiceDir (onion_service/) is relative and will resolve to /Users/danielalopes/coverage_analysis_ndss_2024/onion_service/. Is this what you wanted?
Aug 15 12:39:32.806 [notice] Opening Socks listener on 127.0.0.1:9050
Aug 15 12:39:32.806 [notice] Opened Socks listener connection (ready) on 127.0.0.1:9050
Aug 15 12:39:32.000 [notice] Parsing GEOIP IPv4 file /opt/homebrew/Cellar/tor/0.4.7.14/share/tor/geoip.
Aug 15 12:39:32.000 [notice] Parsing GEOIP IPv6 file /opt/homebrew/Cellar/tor/0.4.7.14/share/tor/geoip6.
Aug 15 12:39:32.000 [notice] Bootstrapped 0% (starting): Starting
Aug 15 12:39:33.000 [notice] Starting with guard context "defa

KeyboardInterrupt: 

In [None]:
guard_nodes = joblib.load(GUARD_NODES_FILE)
print("\nguard_nodes", guard_nodes)
print("len(guard_nodes)", guard_nodes)