In [1]:
from urllib import request
from bs4 import BeautifulSoup
from tqdm import tqdm
import json
import re
import requests
from hashlib import blake2b

### Scraper for Filecoin method names

New actors and methods appear on the Filecoin network constantly. This notebook scrapes the `builtin-actors` repo to get the latest list of actors and their methods. 

It also performs external method hashing according to the [FRC42](https://github.com/helix-onchain/filecoin/tree/main/frc42_dispatch) specification.

In [2]:
# FRC42 method hashing
# from https://github.com/helix-onchain/filecoin/blob/main/frc42_dispatch/generate_hashes.py
def method_number_frc42(name):
    name = '1|' + name
    hash = blake2b(name.encode('ascii'), digest_size=64)
    #print('digest: ' + hash.hexdigest())
    #print(f'{len(hash.digest())} bytes long')

    digest = hash.digest()
    while digest:
        chunk = digest[:4]
        num = int.from_bytes(chunk, byteorder='big')
        if num >= 1<<24:
            return num
        digest = digest[4:]
    raise Exception("Method ID could not be determined, please change it") 


In [26]:
# 0. Pull the latest list of actor types from Github repo directory
html=request.urlopen('https://github.com/filecoin-project/builtin-actors/tree/master/actors').read()
soup = BeautifulSoup(html)

# 1. Generate list of unique actors
all_actor_categories_soup = soup.find_all("a", {'class': "js-navigation-open Link--primary"})
all_actor_categories = [all_actor_categories_soup[i].string for i in range(len(all_actor_categories_soup)) 
                        # if all_actor_categories_soup[i].string[-2:] != 'go' # don't parse a .go script
                        ]

actor_methods = {}


In [27]:
t = tqdm(all_actor_categories)
for current_actor in t:
    t.set_description('Parsing %s' % (current_actor))

    try:
        raw_actor_url = f'https://raw.githubusercontent.com/filecoin-project/builtin-actors/master/actors/{current_actor}/src/lib.rs'
        raw_actor_text = requests.get(raw_actor_url).text

        methods_for_actor = {}

        # Extract the parsed Rust code for the actor enum
        parsed_code = re.findall(r"{([^}]*)}", raw_actor_text.replace("\n", ""))
        parsed_code_filtered = [pc for pc in parsed_code if 'METHOD_CONSTRUCTOR' in pc]
        parsed_code_filtered = [pc for pc in parsed_code_filtered if '=' in pc]
        all_methods_in_actor = parsed_code_filtered[0].split(",")
        method_names_and_numbers = [amia.strip() for amia in all_methods_in_actor]
        method_names_and_numbers = [re.findall(r"(.*)=(.*)", mnm) for mnm in method_names_and_numbers]

        # Extract the actual actor name as stored in chain
        raw_actor_name = [pc for pc in parsed_code if "static str" in pc]
        raw_actor_name = [re.findall(r'.*"([^"]+)"', ran)[0].lower() for ran in raw_actor_name][0]

        # method 0 is always send, we automatically add this for every actor
        methods_for_actor[0] = 'Send'

        for mnm in method_names_and_numbers:
            try:
                # Make sure the method is not commented out and the method number is numeric
                if mnm[0][1].strip().isnumeric() and '//' not in mnm[0][0].strip():
                    methods_for_actor[int(mnm[0][1].strip())] = mnm[0][0].strip()
                else:

                    if 'METHOD_CONSTRUCTOR' in mnm[0][1].strip():
                        try:
                            methods_for_actor[1] = 'Constructor'
                        except:
                            pass

                    # If a method hashing is specified, we parse that instead
                    if 'frc42_dispatch' in mnm[0][1].strip():
                        try:
                            method_name_parsed = re.findall(r'"([^"]*)"', mnm[0][1].strip())[0]
                            methods_for_actor[method_number_frc42(method_name_parsed)] = method_name_parsed
                        except:
                            pass


            except:
                pass

        if len(methods_for_actor) > 0:
            final_name = current_actor + '/' + raw_actor_name
            actor_methods[final_name] = methods_for_actor

    except:
        pass

Parsing verifreg: 100%|██████████| 16/16 [00:07<00:00,  2.08it/s]   


In [28]:
print(json.dumps(actor_methods, indent=4))

{
    "account/account": {
        "0": "Send",
        "1": "Constructor",
        "2": "PubkeyAddress",
        "2643134072": "AuthenticateMessage"
    },
    "cron/cron": {
        "0": "Send",
        "1": "Constructor",
        "2": "EpochTick"
    },
    "datacap/datacap": {
        "0": "Send",
        "1": "Constructor",
        "116935346": "Mint",
        "2624896501": "Destroy",
        "48890204": "Name",
        "2061153854": "Symbol",
        "3936767397": "Granularity",
        "114981429": "TotalSupply",
        "3261979605": "Balance",
        "80475954": "Transfer",
        "3621052141": "TransferFrom",
        "1777121560": "IncreaseAllowance",
        "1529376545": "DecreaseAllowance",
        "2765635761": "RevokeAllowance",
        "1434719642": "Burn",
        "2979674018": "BurnFrom",
        "4205072950": "Allowance"
    },
    "eam/evmaddressmanager": {
        "0": "Send",
        "1": "Constructor",
        "2": "Create",
        "3": "Create2",
        "4":