In [1]:
import warnings
from urllib3.exceptions import NotOpenSSLWarning
import os
import re
import pandas as pd
import time
import requests
import networkx as nx
import plotly.graph_objects as go
import plotly.express as px
from dash import Dash, dcc, html, Input, Output




In [2]:

warnings.simplefilter('ignore', NotOpenSSLWarning)

# Function to identify Ethereum addresses
def is_ethereum_address(address):
    if pd.isna(address) or not isinstance(address, str):
        return False
    return re.match(r'^0x[a-fA-F0-9]{40}$', address) is not None

# Load the combined wallets data
combined_wallets_data_path = "/Users/vishalsehgal/Documents/Masters/Data Forensics/DFProject-1/crawler/crawler/analysis/wallet_data/combined_wallets_data.csv"
print(f"Loading data from {combined_wallets_data_path}")
data = pd.read_csv(combined_wallets_data_path)
print(f"Data loaded. Shape: {data.shape}")


Loading data from /Users/vishalsehgal/Documents/Masters/Data Forensics/DFProject-1/crawler/crawler/analysis/wallet_data/combined_wallets_data.csv
Data loaded. Shape: (19, 29)


In [3]:

# Check each column for Ethereum addresses
eth_addresses = {}

for column in data.columns[1:]:  # Skip the first column which is 'url'
    eth_addresses[column] = data[column].apply(is_ethereum_address)

# Summarize the results
eth_summary = pd.DataFrame(eth_addresses).sum()
eth_summary = eth_summary[eth_summary > 0]
print(f"Ethereum address columns identified: {list(eth_summary.index)}")

# Extract unique Ethereum addresses
eth_columns = eth_summary.index.tolist()
eth_data = data[eth_columns]

# Melt the dataframe to have a long format for easier processing
eth_addresses_long = eth_data.melt(var_name='address_type', value_name='address')
eth_addresses_long = eth_addresses_long.dropna(subset=['address'])

# Filter for unique addresses
unique_eth_addresses = eth_addresses_long['address'].unique()
print(f"Unique Ethereum addresses found: {len(unique_eth_addresses)}")


Ethereum address columns identified: ['BEP20', 'KCC', 'ERC20', 'AVAX', 'MATIC', 'Arbitrum', 'Network', 'One', 'zkSync', 'Linea', 'OP', 'Matic']
Unique Ethereum addresses found: 10


In [4]:

# Function to fetch transactions for multiple addresses
def fetch_transactions(address, api_key):
    url = f"https://api.etherscan.io/api?module=account&action=txlist&address={address}&startblock=0&endblock=99999999&sort=asc&apikey={api_key}"
    response = requests.get(url)
    data = response.json()
    if data['status'] == '1':
        return data['result']
    else:
        return []

def fetch_all_transactions(addresses, api_key):
    all_transactions = []
    for address in addresses:
        transactions = fetch_transactions(address, api_key)
        for tx in transactions:
            tx['address_type'] = address
        all_transactions.extend(transactions)
        time.sleep(0.2)  # To respect API rate limits
    return all_transactions


In [5]:

# Fetch transactions for identified Ethereum addresses
api_key = 'DMY3T9QQP87HIWS29EAFWMW6RVYPZYIHRG'
print("Fetching transactions...")
all_transactions = fetch_all_transactions(unique_eth_addresses, api_key)  # Fetch all transactions
print(f"Transactions fetched. Total transactions: {len(all_transactions)}")

# Convert to DataFrame
transactions_df = pd.DataFrame(all_transactions)
print(f"Transactions DataFrame created. Shape: {transactions_df.shape}")


Fetching transactions...
Transactions fetched. Total transactions: 1151
Transactions DataFrame created. Shape: (1151, 21)


In [6]:

# Process transactions into edges and count transactions
def process_transactions(transactions):
    edges = []
    tx_counts = {}
    tx_amounts = {}
    for tx in transactions:
        sender = tx['from']
        receiver = tx['to']
        value = int(tx['value']) / 10**18  # Convert Wei to Ether
        if value >= 0.1:  # Only process transactions with value greater than or equal to 0.1 ETH
            edges.append((sender, receiver, value, tx['hash'], tx['blockNumber'], tx['timeStamp']))
            if sender not in tx_counts:
                tx_counts[sender] = 0
                tx_amounts[sender] = 0
            tx_counts[sender] += 1
            tx_amounts[sender] += value
    return edges, tx_counts, tx_amounts

edges, tx_counts, tx_amounts = process_transactions(all_transactions)  # Process all transactions
print(f"Edges processed. Total edges: {len(edges)}")


Edges processed. Total edges: 98


In [7]:

# Filter out wallets with total amount < 0.1 ETH
filtered_tx_counts = {k: v for k, v in tx_counts.items() if tx_amounts[k] >= 0.1}
filtered_tx_amounts = {k: v for k, v in tx_amounts.items() if v >= 0.1}

# Sort transaction amounts in descending order
sorted_tx_amounts = dict(sorted(filtered_tx_amounts.items(), key=lambda item: item[1], reverse=True))

# Print transaction counts and total amounts
print("Transaction counts and total amounts for each wallet (sorted by total amount):")
for address in sorted_tx_amounts:
    print(f"Address: {address}, Transactions: {filtered_tx_counts[address]}, Total Amount: {sorted_tx_amounts[address]:.2f} ETH")


Transaction counts and total amounts for each wallet (sorted by total amount):
Address: 0xfe69c92ad05ce854029af0a26022d360830381cb, Transactions: 54, Total Amount: 19.25 ETH
Address: 0xd24400ae8bfebb18ca49be86258a3c749cf46853, Transactions: 3, Total Amount: 4.25 ETH
Address: 0x6e14ebeb24e4a3cb0d39f7b88e39ff435e7d8760, Transactions: 1, Total Amount: 2.25 ETH
Address: 0x0c69ac239f21bb6380ba6a07120af51cc26c9d88, Transactions: 3, Total Amount: 2.21 ETH
Address: 0x4bb8cdb74cb3e97ea70520e92cbd46a9594837f2, Transactions: 1, Total Amount: 1.71 ETH
Address: 0xc5c61a1ea535c5b783a81dea6508dbfe262ad6b6, Transactions: 1, Total Amount: 1.65 ETH
Address: 0xddfabcdc4d8ffc6d5beaf154f18b778f892a0740, Transactions: 1, Total Amount: 0.82 ETH
Address: 0xe76aa51e9d73dfd715c202de8fd21f3431a65216, Transactions: 1, Total Amount: 0.67 ETH
Address: 0x9bd633da262128b96ca876185239c368fe0e9125, Transactions: 1, Total Amount: 0.63 ETH
Address: 0x572bb57c80672c761bed563a1771f8fb380a317b, Transactions: 1, Total Amount

In [8]:

# Create a NetworkX graph
G = nx.DiGraph()

for edge in edges:
    if edge[0] in filtered_tx_counts and edge[1] in filtered_tx_counts:  # Only add edges where both sender and receiver have non-zero transactions
        G.add_edge(edge[0], edge[1], weight=edge[2], hash=edge[3], block=edge[4], time=edge[5])

# Calculate network metrics
degree_dict = dict(G.degree(G.nodes()))
betweenness_dict = nx.betweenness_centrality(G)
closeness_dict = nx.closeness_centrality(G)


In [9]:

# Generate positions for nodes
pos = nx.spring_layout(G, k=0.1)

# Create edge trace with annotations for arrows
edge_trace = []
annotations = []

for edge in G.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    weight = edge[2]['weight']
    edge_trace.append(
        go.Scatter(
            x=[x0, x1, None],
            y=[y0, y1, None],
            line=dict(width=2, color='#888'),
            hoverinfo='text',
            text=f'{edge[0]} -> {edge[1]}: {weight:.2f} ETH',
            mode='lines'
        )
    )
    annotations.append(
        dict(
            ax=x0, ay=y0, axref='x', ayref='y',
            x=x1, y=y1, xref='x', yref='y',
            showarrow=True, arrowhead=3, arrowsize=1, arrowwidth=1, arrowcolor='#888'
        )
    )

# Create node trace
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers+text',
    textposition="top center",
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        color=[],
        size=20,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2, color='#333')
    )
)

# Add node positions and labels
for node in G.nodes():
    if node in filtered_tx_counts:  # Only add nodes that have transactions greater than or equal to 0.1 ETH
        x, y = pos[node]
        node_trace['x'] += (x,)
        node_trace['y'] += (y,)
        tx_count = filtered_tx_counts.get(node, 0)
        tx_amount = sorted_tx_amounts.get(node, 0)
        incoming_tx = sum([1 for u, v, d in G.in_edges(node, data=True) if d['weight'] >= 0.1])
        outgoing_tx = sum([1 for u, v, d in G.out_edges(node, data=True) if d['weight'] >= 0.1])
        node_trace['text'] += (f'{node}\nTransactions: {tx_count}\nTotal Amount: {tx_amount:.2f} ETH\n'
                               f'Incoming: {incoming_tx}\nOutgoing: {outgoing_tx}',)
        # Node color represents the number of connections (degree)
        node_trace['marker']['color'] += (len(list(G.neighbors(node))),)

# Create time series data
transactions_df['timeStamp'] = pd.to_datetime(transactions_df['timeStamp'], unit='s')
transactions_df = transactions_df[transactions_df['value'].astype(float) >= 0.1 * 10**18]  # Filter transactions >= 0.1 ETH
time_series = transactions_df.groupby(transactions_df['timeStamp'].dt.date).size().reset_index(name='counts')

# Create time series trace
time_series_fig = px.line(time_series, x='timeStamp', y='counts', title='Transactions Over Time')

# Create the Plotly figure
fig = go.Figure(
    data=edge_trace + [node_trace],
    layout=go.Layout(
        title='<br>Ethereum Network Graph',
        titlefont=dict(size=20),
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20, l=5, r=5, t=40),
        annotations=annotations,
        xaxis=dict(showgrid=False, zeroline=False),
        yaxis=dict(showgrid=False, zeroline=False),
        clickmode='event+select'
    )
)


  transactions_df['timeStamp'] = pd.to_datetime(transactions_df['timeStamp'], unit='s')


In [10]:

# Set up Dash app
app = Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='network-graph', figure=fig),
    dcc.Graph(id='time-series', figure=time_series_fig),
    html.Div(id='node-info', style={'whiteSpace': 'pre-line', 'marginTop': 20})
])

@app.callback(
    Output('network-graph', 'figure'),
    Output('node-info', 'children'),
    [Input('network-graph', 'clickData')]
)
def display_node_info(clickData):
    if clickData is None:
        return fig, "Click on a node to see its transaction details."
    
    if 'points' not in clickData or len(clickData['points']) == 0 or 'text' not in clickData['points'][0]:
        return fig, "Click on a node to see its transaction details."
    
    node_id = clickData['points'][0]['text'].split('\n')[0]
    connected_nodes = list(G.successors(node_id)) + list(G.predecessors(node_id))
    connected_info = [f"{node} (Transactions: {filtered_tx_counts.get(node, 0)}, Total Amount: {filtered_tx_amounts.get(node, 0):.2f} ETH)" for node in connected_nodes]
    
    node_transactions = [
        f"Hash: {G.edges[edge]['hash']}, Value: {G.edges[edge]['weight']:.2f} ETH, Block: {G.edges[edge]['block']}, Timestamp: {G.edges[edge]['time']}"
        for edge in G.edges(node_id)
    ]
    
    degree = degree_dict.get(node_id, 0)
    betweenness = betweenness_dict.get(node_id, 0)
    closeness = closeness_dict.get(node_id, 0)
    
    node_info = (f"Transactions for {node_id}:\n" + "\n".join(node_transactions) +
            f"\n\nConnected nodes:\n" + "\n".join(connected_info) +
            f"\n\nNetwork metrics:\nDegree: {degree}\nBetweenness: {betweenness:.4f}\nCloseness: {closeness:.4f}")

    # Highlight edges connected to the clicked node
    highlight_edge_trace = []
    highlight_annotations = []
    for edge in G.edges(data=True):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        weight = edge[2]['weight']
        color = '#ff0000' if edge[0] == node_id or edge[1] == node_id else '#888'
        width = 4 if edge[0] == node_id or edge[1] == node_id else 2
        highlight_edge_trace.append(
            go.Scatter(
                x=[x0, x1, None],
                y=[y0, y1, None],
                line=dict(width=width, color=color),
                hoverinfo='text',
                text=f'{edge[0]} -> {edge[1]}: {weight:.2f} ETH',
                mode='lines'
            )
        )
        if edge[0] == node_id or edge[1] == node_id:
            highlight_annotations.append(
                dict(
                    ax=x0, ay=y0, axref='x', ayref='y',
                    x=x1, y=y1, xref='x', yref='y',
                    showarrow=True, arrowhead=3, arrowsize=1, arrowwidth=1, arrowcolor=color
                )
            )

    highlight_fig = go.Figure(
        data=highlight_edge_trace + [node_trace],
        layout=go.Layout(
            title='<br>Ethereum Network Graph',
            titlefont=dict(size=20),
            showlegend=False,
            hovermode='closest',
            margin=dict(b=20, l=5, r=5, t=40),
            annotations=highlight_annotations,
            xaxis=dict(showgrid=False, zeroline=False),
            yaxis=dict(showgrid=False, zeroline=False),
            clickmode='event+select'
        )
    )

    return highlight_fig, node_info


In [11]:

if __name__ == '__main__':
    app.run_server(debug=True)
