In [13]:
import time
import os
import re
import json
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver

from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

In [14]:
def write_to_json_file(data, file_path: str):
    """
    Writes a Python object (e.g., a dictionary or list) to a JSON file.

    This function serializes the data into a JSON string and writes
    it to the specified file, ensuring the file is properly closed
    and handling potential errors.

    Args:
        data: The Python object to be serialized and written. Can be a dict,
              list, or other serializable object.
        file_path (str): The path to the output JSON file.
    """
    try:
        # The 'with' statement ensures the file is automatically closed.
        # The 'w' mode opens the file for writing (and creates it if it doesn't exist).
        # We specify encoding='utf-8' for better compatibility with different characters.
        with open(file_path, 'w', encoding='utf-8') as json_file:
            # json.dump() serializes a Python object into a JSON formatted stream
            # and writes it to a file-like object (json_file in this case).
            # 'indent=4' makes the output JSON file human-readable with nice formatting.
            json.dump(data, json_file, indent=4)
        print(f"Successfully wrote data to '{file_path}'")
    except IOError as e:
        print(f"An I/O error occurred while writing the file: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

In [10]:
def read_all_lines(file_path: str) -> list:
    """
    Reads a text file and extracts every line, inserting them into a list.

    Args:
        file_path (str): The path to the input text file.

    Returns:
        list: A list containing all the lines from the file. Returns an empty
              list if the file does not exist or an error occurs.
    """
    # Validate the input file path
    if not os.path.exists(file_path):
        print(f"Error: The file '{file_path}' was not found.")
        return []

    extracted_lines = []

    try:
        # Open the file in read mode
        with open(file_path, 'r', encoding='utf-8') as file:
            # Iterate through each line in the file
            for line in file:
                # Append the line, stripping any trailing newline characters
                extracted_lines.append(line.strip())
    except IOError as e:
        print(f"An I/O error occurred: {e}")
        return []
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return []

    return extracted_lines

In [12]:
wallet_ids = read_all_lines('./sample.txt')

# Request page for wallet info of current wallet id (this page uses javascript to pull data, 
# so we need to use selenium to allow page to wait for data fetching)
wallet_id_info_url = f"https://www.blockchain.com/explorer/addresses/btc/{wallet_id}"
    
service = Service(ChromeDriverManager().install())
print("ChromeDriver setup successful.")
            
# Initialize the Chrome browser with the service object
driver = webdriver.Chrome(service=service)
print("Chrome browser initialized.")

# list of dictionaries of sanctioned wallet and transacting wallets
transactions = []

# Get all transactions for current wallet id (only first for now)
# wallet_id = wallet_ids[0]
for wallet_id in wallet_ids:
        
    print(f"Checking outgoing transactions for wallet {wallet_id}")
    
    # driver = webdriver.Chrome()
    driver.get(wallet_id_info_url)
    
    # Wait for section component containing data to appear
    WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.TAG_NAME, "section")));
    
    # All data is found inside section component, only search in that subtree
    section = driver.find_element(By.TAG_NAME, "section")
    
    all_anchors = section.find_elements(By.TAG_NAME, "a")
    
    outgoing_wallet_ids = []
    for a in all_anchors:
        # Only match anchor tags with text content that matches the bitcoin wallet id regex and is not our current wallet id
        if a.text != "" and re.search(r"(bc1|[13])[a-zA-HJ-NP-Z0-9]{25,39}", a.text) and a.text != wallet_id:
            outgoing_wallet_ids.append(a.text)
    
    print(f"Retrieved {len(outgoing_wallet_ids)} transacting wallets for {wallet_id}")

    record = {'sanctioned_wallet' : wallet_id, 'transacted_with' : outgoing_wallet_ids}
    transactions.append(record)
    
    # Sleep to prevent excessive requests to blockchain.com
    time.sleep(5)

driver.quit()

ChromeDriver setup successful.
Chrome browser initialized.
Checking outgoing transactions for wallet bc1qcp6fr7gtyukympl6unr7uv78h3vprycwj455zx
Retrieved 6 transacting wallets for bc1qcp6fr7gtyukympl6unr7uv78h3vprycwj455zx
Checking outgoing transactions for wallet 0xE950DC316b836e4EeFb8308bf32Bf7C72a1358FF
Retrieved 8 transacting wallets for 0xE950DC316b836e4EeFb8308bf32Bf7C72a1358FF
Checking outgoing transactions for wallet 1Ge8JodC2HiBiEuT7D3MoH6Fak6XrcT9Kf
Retrieved 8 transacting wallets for 1Ge8JodC2HiBiEuT7D3MoH6Fak6XrcT9Kf
Checking outgoing transactions for wallet 1FjubFHV4mpYjBmvjsEhZssyiiA4TNmnm2
Retrieved 8 transacting wallets for 1FjubFHV4mpYjBmvjsEhZssyiiA4TNmnm2
Checking outgoing transactions for wallet 3H3rh85qPaGLy2w6618yZNaH7i8asHv46B
Retrieved 8 transacting wallets for 3H3rh85qPaGLy2w6618yZNaH7i8asHv46B
Checking outgoing transactions for wallet 0x4f47bc496083c727c5fbe3ce9cdf2b0f6496270c
Retrieved 8 transacting wallets for 0x4f47bc496083c727c5fbe3ce9cdf2b0f6496270c
Check

In [15]:
write_to_json_file(transactions, './data/transactions.json')

Successfully wrote data to './data/transactions.json'
