In [2]:
import keyring

import pymongo
import pymongoarrow.monkey
from pymongoarrow.api import Schema
import pyarrow as pa

from datetime import datetime

In [3]:
MDB_URI = keyring.get_password("system", "MONGO_CONNECTION_STRING")

In [4]:
# Add extra find_* methods to pymongo collection objects:
pymongoarrow.monkey.patch_all()

client = pymongo.MongoClient(MDB_URI)
database = client.get_database("transactions")
collection = database.get_collection("mempool")

In [10]:
collection.find_one()

{'_id': ObjectId('648ce10f18e0fbc3c3482f75'),
 'hash': '0x303936c1821da2e23c08fe2171947028ac394fe6d7eb5338d2643365a867b348',
 'type': 2,
 'accessList': [],
 'blockHash': None,
 'blockNumber': None,
 'transactionIndex': None,
 'confirmations': 0,
 'from': '0x4899951FFa90d493c58DC1db8760f15c8Ed2873A',
 'gasPrice': 30.998999649,
 'maxPriorityFeePerGas': {'_hex': '0xb2d05e00', '_isBigNumber': True},
 'maxFeePerGas': {'_hex': '0x0737af3261', '_isBigNumber': True},
 'gasLimit': 361022,
 'to': '0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D',
 'value': {'_hex': '0x7c585087238004', '_isBigNumber': True},
 'nonce': 5,
 'data': '0xb6f9de95000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000000000004899951ffa90d493c58dc1db8760f15c8ed2873a00000000000000000000000000000000000000000000000000000000648ce1850000000000000000000000000000000000000000000000000000000000000002000000000000000000000000c02aaa39b223fe8d0a0e5c4

In [18]:
schema = Schema({
    "_id": str,
    "timestamp": pa.timestamp("ms"),
    "gasPrice": float,
})

collection.find_pandas_all(
    {"gasPrice": {"$gt": 0}},
    schema=schema,
)

Unnamed: 0,_id,timestamp,gasPrice
0,,NaT,30.999000
1,,NaT,30.603247
2,,NaT,30.999000
3,,NaT,30.603247
4,,NaT,30.999000
...,...,...,...
64484,,NaT,8.576287
64485,,NaT,9.000000
64486,,NaT,10.000000
64487,,NaT,10.000000


In [24]:
from pymongoarrow.api import Schema
import pandas as pd

schema = Schema({
    "_id": str,
    "timestamp": float,
    "gasPrice": float,
})

df = collection.find_pandas_all({"gasPrice": {"$gt": 0}}, schema=schema)

# Convert the float timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')


In [25]:
df

Unnamed: 0,_id,timestamp,gasPrice
0,,2023-06-16 22:24:15.201999872,30.999000
1,,2023-06-16 22:24:15.191000064,30.603247
2,,2023-06-16 22:24:15.132999936,30.999000
3,,2023-06-16 22:24:16.993999872,30.603247
4,,2023-06-16 22:24:15.204999936,30.999000
...,...,...,...
64990,,2023-06-19 21:14:17.736999936,17.678750
64991,,2023-06-19 21:14:19.675000064,18.000000
64992,,2023-06-19 21:14:23.163000064,18.469492
64993,,2023-06-19 21:14:25.440999936,10.000000


In [26]:
cursor = collection.find({"gasPrice": {"$gt": 0}})
df = pd.DataFrame(list(cursor))
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')


In [32]:
df.value_counts('to') / len(df)

to
0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D    0.547872
0x3fC91A3afd70395Cd496C647d5a6CC9D4B2b7FAD    0.295441
0xEf1c6E67703c7BD7107eed8303Fbe6EC2554BF6B    0.081157
0xE592427A0AEce92De3Edee1F18E0157C05861564    0.049630
0x68b3465833fb72A70ecDF485E0e4C7bD8665Fc45    0.025901
Name: count, dtype: float64

In [34]:
address_to_contract_name = dict([  ["0xf164fC0Ec4E93095b804a4795bBe1e041497b92a", "UniswapV2Router01"],
  ["0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D", "UniswapV2Router02"],
  ["0xE592427A0AEce92De3Edee1F18E0157C05861564", "UniswapV3Router01"],
  ["0x68b3465833fb72A70ecDF485E0e4C7bD8665Fc45", "UniswapV3Router02"],
  ["0xEf1c6E67703c7BD7107eed8303Fbe6EC2554BF6B", "UniversalRouter01"],
  ["0x3fC91A3afd70395Cd496C647d5a6CC9D4B2b7FAD", "UniversalRouter02"],])


# Write the above dictionary to a json file so we can use it across the project
import json

with open('router_addresses.json', 'w') as fp:
    json.dump(address_to_contract_name, fp)


In [37]:
df.hash.nunique() / len(df)

0.81358938291236