In [27]:
import requests
import pandas as pd
import json
import os
import re

from dotenv import load_dotenv
from neo4j import GraphDatabase


load_dotenv()


NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_PASS = os.getenv('NEO4J_PASS')
neo4j_client = GraphDatabase.driver(NEO4J_URI, auth=('neo4j', NEO4J_PASS))
retrieval = "InitialIngest"

def execute_query(query, params=None, neo4j=neo4j_client):
    with neo4j_client.session() as session:
        result = session.run(query, parameters=params)
        # Convert the result to a DataFrame
        records = [record.data() for record in result]
        return pd.DataFrame(records)


In [14]:
##
def call_snapshot_api(query, snapshot_url="https://hub.snapshot.org/graphql", counter=0):
    if counter > 10:
        raise Exception("Max retries exceeded.")
    response = requests.post(snapshot_url, json={"query": query})
    if response.status_code == 504:
        # Handle Gateway Timeout
        return make_api_call(query, snapshot_url, counter=counter + 1)
    data = response.json()
    if "errors" in data:
        raise Exception("Error in API response: {}".format(data["errors"]))
    return data["data"]


def clean_text(text):
    text = re.sub(r"[“”]", '"', text)  # Replace smart double quotes
    text = re.sub(r"[‘’]", "'", text)  # Replace smart single quotes
    text = re.sub(r"[—–]", "-", text)
    text = re.sub(r"[^\x00-\x7F]+", "", text)
    
    return text

In [15]:
arb_space_id = "arbitrumfoundation.eth"  # Replace with the actual space ID

spaces_query = f"""
{{
    space(id: "{arb_space_id}") {{
        id
        name
        about
        avatar
        website
        twitter
        github
        network
        symbol
        strategies {{
            name
            params
        }}
        admins
        members
    }}
}}
""" 

proposals_query = f"""
{{
    proposals(
        where: {{
            space_in: ["{arb_space_id}"]
        }},
        first: 1000
    ) {{
        id
        title
        body
        choices
        start
        end
        state
        author
    }}
}}
""" 

votes_query = f"""
{{
    votes(
        where: {{
            proposal: "{arb_space_id}"
        }},
        first: 10
    ) {{
        id
        voter
        choice
    }}
}}
"""

In [10]:
space_data = call_snapshot_api(spaces_query)
print(json.dumps(space_data, indent=4))


{
    "space": {
        "id": "arbitrumfoundation.eth",
        "name": "Arbitrum DAO",
        "about": "The official snapshot space for the Arbitrum DAO",
        "avatar": "ipfs://bafkreiejp7kjzjm4mlck45xcfup5lha7wrgqlyghvrtvpjpu5sfb6rl2ei",
        "website": "https://arbitrum.foundation",
        "twitter": "arbitrum",
        "github": null,
        "network": "42161",
        "symbol": "ARB",
        "strategies": [
            {
                "name": "erc20-votes",
                "params": {
                    "symbol": "ARB",
                    "address": "0x912CE59144191C1204E64559FE8253a0e49E6548",
                    "decimals": 18
                }
            }
        ],
        "admins": [],
        "members": []
    }
}


In [None]:
## create snapshot space

params = {
    'spaceId': space_data['space']['id'],
    'name': space_data['space']['name'],
    'website': space_data['space']['website'],
    'twitter': space_data['space']['twitter'],
    'strategyToken': space_data['space']['strategies'][0]['params']['address']
}

space_query = """
    match (entity:Entity {name: "Arbitrum Foundation"}) 
    with entity
    merge (space:Snapshot:Space {spaceId: $spaceId}) 
    set space.name = $name
    set space.website = $website
    set space.twitter = $twitter 
    set space.strategyToken = $strategyToken
    with entity, space 
    merge (entity)-[r:ACCOUNT]->(space)
    return entity, r, space
"""

execute_query(space_query, params)

In [None]:
# dump

In [16]:
proposals_data = call_snapshot_api(proposals_query)

proposals_data['proposals'][0].keys()

dict_keys(['id', 'title', 'body', 'choices', 'start', 'end', 'state', 'author'])

In [17]:
proposals_data_df = pd.DataFrame(proposals_data['proposals'])

In [None]:
proposals_data_df.to_csv("snapshot-scraped-data/arb-snapshot-proposals-20240303.csv")

In [11]:
counter = 0 
total = len(proposals_data['proposals'])

## create proposals
for i in proposals_data['proposals']:
    counter += 1
    print(f"Ingesting proposal {str(counter)} out of {str(total)}...")
    
    name = clean_text(i['title'])
    text = clean_text(i['body'])
    authorAddress = i['author'].lower()
    props_params = {
        'id': i['id'],
        'name': i['title'], 
        'startDt': i['start'],
        'endDt': i['end'],
        'choices': i['choices'], 
        'name': name, 
        'text': text,
        'authorAddress': authorAddress
    }
    props_query = """
    merge (prop:Snapshot:Proposal {id: $id})
    set prop.name = $name
    set prop.text = $text
    set prop.startDt = $startDt
    set prop.endDt = $endDt
    set prop.choices = $choices
    """
    execute_query(props_query, props_params)

NameError: name 'proposals_data' is not defined

In [None]:
### connect

execute_query("""match (prop:Proposal) match (space:Space)
with prop, space
merge (space)-[r:PROPOSAL]->(proposal)""")



In [None]:
## create and link authors
counter = 0 
total = len(proposals_data['proposals'])

## create proposals
for i in proposals_data['proposals']:
    counter += 1
    print(f"Ingesting record {str(counter)} out of {str(total)}...")
    
    authorAddress = i['author'].lower()

    walletParams = {
        'authorAddress' : authorAddress,
        'proposalId': i['id']
    }

    walletsQuery = """
    merge (wallet:Wallet {address: $authorAddress}) 
    with wallet 
    match (proposal:Proposal:Snapshot {id: $proposalId}) 
    with wallet, proposal 
    merge (wallet)-[r:AUTHOR]->(proposal)
    """
    execute_query(walletsQuery, walletParams)

In [None]:
## cool voters

In [18]:
def get_votes_for_proposals(proposals_response):
    votes_query_template = """
    {{
        votes (
            first: 1000,
            where: {{
                proposal_in: {proposal_ids}
            }}
        ) {{
            id
            voter
            choice
            proposal {{
                id
            }}
        }}
    }}
    """
    # Assuming proposals_response is the full response from the proposals query
    # and contains a list of proposals under ['data']['proposals']
    proposal_ids = [proposal["id"] for proposal in proposals_response['proposals']]
    votes = []
    for proposal_id in proposal_ids:
        query = votes_query_template.format(proposal_ids=json.dumps([proposal_id]))
        response = requests.post("https://hub.snapshot.org/graphql", json={"query": query})
        if response.status_code == 200:
            data = response.json()["data"]["votes"]
            votes.extend(data)
    return votes

proposal_votes = get_votes_for_proposals(proposals_data)

In [19]:
proposal_votes_df = pd.DataFrame(proposal_votes)


In [20]:
type(proposal_votes_df.iloc[0]['proposal'])
proposal_votes_df['proposalId'] = proposal_votes_df['proposal'].apply(lambda x: x['id'])


In [33]:
proposal_votes_df_upload = proposal_votes_df[['voter', 'choice', 'proposalId']]
proposal_votes_df_upload.iloc[51].proposalId

'0x24344ab10eb905a4d7fa5885c6f681290e765a08a5f558ff6cfc5fedab42afb6'

In [25]:
proposal_votes_df_upload.to_csv("snapshot-scraped-data/arb-snapshot-votes.csv")

In [22]:
proposal_votes_df.head(3)

NameError: name 'proposal_votes_df' is not defined

In [None]:
len(proposal_votes)

In [None]:
proposal_votes[0]

In [28]:
## create wallets
createWallets = """
CALL {
  LOAD CSV WITH HEADERS FROM 
  'https://github.com/jchanolm/arbitrum-data/blob/main/notebooks/arb-governance-data/snapshot/snapshot-scraped-data/arb-snapshot-votes.csv' AS row
  MERGE (wallet:Wallet {address: row.voter})-[r:VOTED]->(prop)
  SET r.choice = row.choice
}
IN TRANSACTIONS OF 500 ROWS;
"""

execute_query(createWallets)

DatabaseError: {code: Neo.DatabaseError.Statement.ExecutionFailed} {message: At https://github.com/jchanolm/arbitrum-data/blob/main/notebooks/arb-governance-data/snapshot/snapshot-scraped-data/arb-snapshot-votes.csv @ position 51 -  there's a field starting with a quote and whereas it ends that quote there seems to be characters in that field after that ending quote. That isn't supported. This is what I read: 'fileTree":'}

In [9]:
## connect wallets to proposals
connectWallets = """
CALL {
  LOAD CSV WITH HEADERS FROM 
  'https://github.com/jchanolm/arbitrum-data/blob/main/notebooks/arb-governance-data/snapshot/snapshot-scraped-data/arb-snapshot-votes.csv' AS row
  match (wallet:Wallet {address: row.voter})
  match (prop:Proposal {id: row.id})
  with wallet, prop, row 
  merge (wallet)-[r:VOTED]->(prop) 
  set r.choice = row.choice
}
IN TRANSACTIONS OF 500 ROWS;
"""

execute_query(connectWallets)

DatabaseError: {code: Neo.DatabaseError.Statement.ExecutionFailed} {message: At https://github.com/jchanolm/arbitrum-data/blob/main/notebooks/arb-governance-data/snapshot/snapshot-scraped-data/arb-snapshot-votes.csv @ position 51 -  there's a field starting with a quote and whereas it ends that quote there seems to be characters in that field after that ending quote. That isn't supported. This is what I read: 'fileTree":'}

In [None]:
## connect snapshot urls from grant subs

match (prop:Snapshot:Proposal)
match (grantee:Grantee)-[r:GRANTEE]-(grant:GrantInitiative)
where r.grantApprovalAction contains prop.id 
with prop, grantee, grant
merge (grant)-[r:PROPROSAL]->(prop)
merge (prop)-[r1:APPROVED_FUNDING]->(grantee)

In [None]:
proposal_votes_df = pd.DataFrame(proposal_votes)

In [None]:
proposal_votes_df.to_csv('snapshot-scraped-data/arb-snapshot-votes.csv')