In [33]:
import requests
import json
import pandas as pd
import os

def generate_query(first, skip):
  query = """
    query {
      ranking(first: %s, skip: %s) {
        items {
          id
          name
          twitter
          proposalsCount
          votesCount
          followersCount
          website
        }
      }
    }
  """ % (first, skip)
  return query


out_dir = 'cached_snapshot_data_2023_07_20'

def get_all_deployments():
    URL = "https://hub.snapshot.org/graphql"


    has_next_page = True
    skip = 0
    while has_next_page:
        # if already cached, skip
        if os.path.exists(f'{out_dir}/snapshot_organizations_{skip}.json'):
            skip += 20
            # print('skipping', skip)
            continue
        query = generate_query(20, skip)
        print('.', skip)
        r = requests.post(URL, json={'query': query})
        data = json.loads(r.text)

        print(data)
        
        organizations = data['data']['ranking']['items']

        if not organizations:
            print('No organizations found')
            if data.get('errors'):
                print(data['errors'])
                if 'The `skip` argument must not be greater than 15000' in data['errors'][0]['message']:
                    break

        # save to out_dir
        with open(f'{out_dir}/snapshot_organizations_{skip}.json', 'w') as f:
            json.dump(organizations, f)
        
        print('  ', len(organizations))
        if len(organizations) < 20:
            has_next_page = False
        skip += 20


df = get_all_deployments()

. 20840
{'data': {'ranking': {'items': None}}}
No organizations found


TypeError: object of type 'NoneType' has no len()

In [34]:
# load all cached data into a df
files = os.listdir(out_dir)
files = [f for f in files if 'snapshot_organizations' in f]
dfs = []
for f in files:
    with open(f'{out_dir}/{f}', 'r') as f:
        data = json.load(f)
    dfs.append(pd.DataFrame(data))
# reset index
df = pd.concat(dfs)
df

Unnamed: 0,id,name,twitter,proposalsCount,votesCount,followersCount,website
0,sybilizer.eth,DAO,,0,0,1,
1,sybuild.eth,vote,,1,1,0,
2,sydtek.eth,SydTek,,0,0,1,
3,sylvial.eth,dsfsd,,1,1,0,
4,sysin.eth,Testing,,1,1,0,
...,...,...,...,...,...,...,...
15,caitlyn1.eth,l,,0,0,0,
16,caiyuangungun.eth,BBQ,Sxcer2,8,18,7,
17,caiyubo888.eth,yuezige,,12,23,10,
18,calebrezzy.eth,crust DAO,,0,0,0,


In [35]:
# how many have less than 10 proposals?
(df['proposalsCount'] < 10).value_counts()
# preview of events to come...

True     18194
False     2588
Name: proposalsCount, dtype: int64

In [36]:
# combine twitter and website into an 'additional' column, ignore NaNs
df['additional'] = df[['twitter', 'website']].apply(lambda x: ', '.join(x.dropna()), axis=1)
# drop twitter and website columns
df.drop(['twitter', 'website'], axis=1, inplace=True)
df

Unnamed: 0,id,name,proposalsCount,votesCount,followersCount,additional
0,sybilizer.eth,DAO,0,0,1,
1,sybuild.eth,vote,1,1,0,
2,sydtek.eth,SydTek,0,0,1,
3,sylvial.eth,dsfsd,1,1,0,
4,sysin.eth,Testing,1,1,0,
...,...,...,...,...,...,...
15,caitlyn1.eth,l,0,0,0,
16,caiyuangungun.eth,BBQ,8,18,7,Sxcer2
17,caiyubo888.eth,yuezige,12,23,10,
18,calebrezzy.eth,crust DAO,0,0,0,


In [37]:
# add platform = 'snapshot'
df['platform'] = 'snapshot'
# rename id to platform_id
df.rename(columns={'id': 'platform_id'}, inplace=True)
df

Unnamed: 0,platform_id,name,proposalsCount,votesCount,followersCount,additional,platform
0,sybilizer.eth,DAO,0,0,1,,snapshot
1,sybuild.eth,vote,1,1,0,,snapshot
2,sydtek.eth,SydTek,0,0,1,,snapshot
3,sylvial.eth,dsfsd,1,1,0,,snapshot
4,sysin.eth,Testing,1,1,0,,snapshot
...,...,...,...,...,...,...,...
15,caitlyn1.eth,l,0,0,0,,snapshot
16,caiyuangungun.eth,BBQ,8,18,7,Sxcer2,snapshot
17,caiyubo888.eth,yuezige,12,23,10,,snapshot
18,calebrezzy.eth,crust DAO,0,0,0,,snapshot


In [38]:
# add website column https://snapshot.org/#/ + platform_id
df['website'] = 'https://snapshot.org/#/' + df['platform_id']
df

Unnamed: 0,platform_id,name,proposalsCount,votesCount,followersCount,additional,platform,website
0,sybilizer.eth,DAO,0,0,1,,snapshot,https://snapshot.org/#/sybilizer.eth
1,sybuild.eth,vote,1,1,0,,snapshot,https://snapshot.org/#/sybuild.eth
2,sydtek.eth,SydTek,0,0,1,,snapshot,https://snapshot.org/#/sydtek.eth
3,sylvial.eth,dsfsd,1,1,0,,snapshot,https://snapshot.org/#/sylvial.eth
4,sysin.eth,Testing,1,1,0,,snapshot,https://snapshot.org/#/sysin.eth
...,...,...,...,...,...,...,...,...
15,caitlyn1.eth,l,0,0,0,,snapshot,https://snapshot.org/#/caitlyn1.eth
16,caiyuangungun.eth,BBQ,8,18,7,Sxcer2,snapshot,https://snapshot.org/#/caiyuangungun.eth
17,caiyubo888.eth,yuezige,12,23,10,,snapshot,https://snapshot.org/#/caiyubo888.eth
18,calebrezzy.eth,crust DAO,0,0,0,,snapshot,https://snapshot.org/#/calebrezzy.eth


In [39]:
# rename votesCount to votes_count and proposalsCount to proposals_count
df.rename(columns={'votesCount': 'votes_count', 'proposalsCount': 'proposals_count'}, inplace=True)
# reorder columns
df = df[['platform', 'platform_id', 'name', 'additional', 'website', 'votes_count', 'proposals_count']]
df

Unnamed: 0,platform,platform_id,name,additional,website,votes_count,proposals_count
0,snapshot,sybilizer.eth,DAO,,https://snapshot.org/#/sybilizer.eth,0,0
1,snapshot,sybuild.eth,vote,,https://snapshot.org/#/sybuild.eth,1,1
2,snapshot,sydtek.eth,SydTek,,https://snapshot.org/#/sydtek.eth,0,0
3,snapshot,sylvial.eth,dsfsd,,https://snapshot.org/#/sylvial.eth,1,1
4,snapshot,sysin.eth,Testing,,https://snapshot.org/#/sysin.eth,1,1
...,...,...,...,...,...,...,...
15,snapshot,caitlyn1.eth,l,,https://snapshot.org/#/caitlyn1.eth,0,0
16,snapshot,caiyuangungun.eth,BBQ,Sxcer2,https://snapshot.org/#/caiyuangungun.eth,18,8
17,snapshot,caiyubo888.eth,yuezige,,https://snapshot.org/#/caiyubo888.eth,23,12
18,snapshot,calebrezzy.eth,crust DAO,,https://snapshot.org/#/calebrezzy.eth,0,0


In [40]:
# save to csv
df.to_csv('snapshot_deployments.csv', index=False)