In [65]:
import requests
import pandas as pd
import json
import os
import re

from dotenv import load_dotenv
from neo4j import GraphDatabase


load_dotenv()


NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_PASS = os.getenv('NEO4J_PASS')
neo4j_client = GraphDatabase.driver(NEO4J_URI, auth=('neo4j', NEO4J_PASS))
retrieval = "InitialIngest"

def execute_query(query, params=None, neo4j=neo4j_client):
    with neo4j_client.session() as session:
        result = session.run(query, parameters=params)
        # Convert the result to a DataFrame
        records = [record.data() for record in result]
        return pd.DataFrame(records)


In [66]:
##
def call_snapshot_api(query, snapshot_url="https://hub.snapshot.org/graphql", counter=0):
    if counter > 10:
        raise Exception("Max retries exceeded.")
    response = requests.post(snapshot_url, json={"query": query})
    if response.status_code == 504:
        # Handle Gateway Timeout
        return make_api_call(query, snapshot_url, counter=counter + 1)
    data = response.json()
    if "errors" in data:
        raise Exception("Error in API response: {}".format(data["errors"]))
    return data["data"]


def clean_text(text):
    text = re.sub(r"[“”]", '"', text)  # Replace smart double quotes
    text = re.sub(r"[‘’]", "'", text)  # Replace smart single quotes
    text = re.sub(r"[—–]", "-", text)
    text = re.sub(r"[^\x00-\x7F]+", "", text)
    
    return text

In [67]:
arb_space_id = "arbitrumfoundation.eth"  # Replace with the actual space ID

spaces_query = f"""
{{
    space(id: "{arb_space_id}") {{
        id
        name
        about
        avatar
        website
        twitter
        github
        network
        symbol
        strategies {{
            name
            params
        }}
        admins
        members
    }}
}}
""" 

proposals_query = f"""
{{
    proposals(
        where: {{
            space_in: ["{arb_space_id}"]
        }},
        first: 1000
    ) {{
        id
        title
        body
        choices
        start
        end
        state
        author
    }}
}}
""" 

votes_query = f"""
{{
    votes(
        where: {{
            proposal: "{arb_space_id}"
        }},
        first: 10
    ) {{
        id
        voter
        choice
    }}
}}
"""

In [None]:
space_data = make_api_call(spaces_query)
print(json.dumps(space_data, indent=4))


In [None]:
## create snapshot space

params = {
    'spaceId': space_data['space']['id'],
    'name': space_data['space']['name'],
    'website': space_data['space']['website'],
    'twitter': space_data['space']['twitter'],
    'strategyToken': space_data['space']['strategies'][0]['params']['address']
}

space_query = """
    match (entity:Entity {name: "Arbitrum Foundation"}) 
    with entity
    merge (space:Snapshot:Space {spaceId: $spaceId}) 
    set space.name = $name
    set space.website = $website
    set space.twitter = $twitter 
    set space.strategyToken = $strategyToken
    with entity, space 
    merge (entity)-[r:ACCOUNT]->(space)
    return entity, r, space
"""

execute_query(space_query, params)

In [None]:
# dump

In [68]:
proposals_data = make_api_call(proposals_query)

proposals_data['proposals'][0].keys()

dict_keys(['id', 'title', 'body', 'choices', 'start', 'end', 'state', 'author'])

In [69]:
proposals_data_df = pd.DataFrame(proposals_data['proposals'])

In [70]:
proposals_data_df.to_csv("snapshot-scraped-data/arb-snapshot-proposals-20240303.csv")

In [76]:
counter = 0 
total = len(proposals_data['proposals'])

## create proposals
for i in proposals_data['proposals']:
    counter += 1
    print(f"Ingesting proposal {str(counter)} out of {str(total)}...")
    
    name = clean_text(i['title'])
    text = clean_text(i['body'])
    authorAddress = i['author'].lower()
    props_params = {
        'id': i['id'],
        'name': i['title'], 
        'startDt': i['start'],
        'endDt': i['end'],
        'choices': i['choices'], 
        'name': name, 
        'text': text,
        'authorAddress': authorAddress
    }
    props_query = """
    merge (prop:Snapshot:Proposal {id: $id})
    set prop.name = $name
    set prop.text = $text
    set prop.startDt = $startDt
    set prop.endDt = $endDt
    set prop.choices = $choices
    """
    execute_query(props_query, props_params)

Ingesting proposal 1 out of 145...
Ingesting proposal 2 out of 145...
Ingesting proposal 3 out of 145...
Ingesting proposal 4 out of 145...
Ingesting proposal 5 out of 145...
Ingesting proposal 6 out of 145...
Ingesting proposal 7 out of 145...
Ingesting proposal 8 out of 145...
Ingesting proposal 9 out of 145...
Ingesting proposal 10 out of 145...
Ingesting proposal 11 out of 145...
Ingesting proposal 12 out of 145...
Ingesting proposal 13 out of 145...
Ingesting proposal 14 out of 145...
Ingesting proposal 15 out of 145...
Ingesting proposal 16 out of 145...
Ingesting proposal 17 out of 145...
Ingesting proposal 18 out of 145...
Ingesting proposal 19 out of 145...
Ingesting proposal 20 out of 145...
Ingesting proposal 21 out of 145...
Ingesting proposal 22 out of 145...
Ingesting proposal 23 out of 145...
Ingesting proposal 24 out of 145...
Ingesting proposal 25 out of 145...
Ingesting proposal 26 out of 145...
Ingesting proposal 27 out of 145...
Ingesting proposal 28 out of 145...
I

In [78]:
### connect

execute_query("""match (prop:Proposal) match (space:Space)
with prop, space
merge (space)-[r:PROPOSAL]->(proposal)""")



In [82]:
## create and link authors
counter = 0 
total = len(proposals_data['proposals'])

## create proposals
for i in proposals_data['proposals']:
    counter += 1
    print(f"Ingesting record {str(counter)} out of {str(total)}...")
    
    authorAddress = i['author'].lower()

    walletParams = {
        'authorAddress' : authorAddress,
        'proposalId': i['id']
    }

    walletsQuery = """
    merge (wallet:Wallet {address: $authorAddress}) 
    with wallet 
    match (proposal:Proposal:Snapshot {id: $proposalId}) 
    with wallet, proposal 
    merge (wallet)-[r:AUTHOR]->(proposal)
    """
    execute_query(walletsQuery, walletParams)

Ingesting record 1 out of 145...
Ingesting record 2 out of 145...
Ingesting record 3 out of 145...
Ingesting record 4 out of 145...
Ingesting record 5 out of 145...
Ingesting record 6 out of 145...
Ingesting record 7 out of 145...
Ingesting record 8 out of 145...
Ingesting record 9 out of 145...
Ingesting record 10 out of 145...
Ingesting record 11 out of 145...
Ingesting record 12 out of 145...
Ingesting record 13 out of 145...
Ingesting record 14 out of 145...
Ingesting record 15 out of 145...
Ingesting record 16 out of 145...
Ingesting record 17 out of 145...
Ingesting record 18 out of 145...
Ingesting record 19 out of 145...
Ingesting record 20 out of 145...
Ingesting record 21 out of 145...
Ingesting record 22 out of 145...
Ingesting record 23 out of 145...
Ingesting record 24 out of 145...
Ingesting record 25 out of 145...
Ingesting record 26 out of 145...
Ingesting record 27 out of 145...
Ingesting record 28 out of 145...
Ingesting record 29 out of 145...
Ingesting record 30 out

In [None]:
## cool voters

In [None]:
def get_votes_for_proposals(proposals_response):
    votes_query_template = """
    {{
        votes (
            first: 1000,
            where: {{
                proposal_in: {proposal_ids}
            }}
        ) {{
            id
            voter
            choice
            proposal {{
                id
            }}
        }}
    }}
    """
    # Assuming proposals_response is the full response from the proposals query
    # and contains a list of proposals under ['data']['proposals']
    proposal_ids = [proposal["id"] for proposal in proposals_response['proposals']]
    votes = []
    for proposal_id in proposal_ids:
        query = votes_query_template.format(proposal_ids=json.dumps([proposal_id]))
        response = requests.post("https://hub.snapshot.org/graphql", json={"query": query})
        if response.status_code == 200:
            data = response.json()["data"]["votes"]
            votes.extend(data)
    return votes

proposal_votes = get_votes_for_proposals(proposals_data)

In [83]:
len(proposal_votes)

145000

In [88]:
proposal_votes_df = pd.DataFrame(proposal_votes)

In [89]:
proposal_votes_df.to_csv('snapshot-scraped-data/arb-snapshot-votes.csv')