In [54]:
# import requests
import json
import pandas as pd
import os
import time
import datetime as dt
import requests_cache

In [55]:
out_dir = 'cached_snapshot_data_2023_07_20'
BATCH_SIZE = 1000 # Max first: size
backoff_seconds = 60 / 100 # 100 requests per minute
MAX_DATE: str = "2024-01-01T00:00:00Z"

In [56]:
MAX_TIMESTAMP: int = int(dt.datetime.fromisoformat(MAX_DATE).timestamp())
print(MAX_TIMESTAMP)

1704067200


In [57]:
def generate_query(first, created_gte):
  query = f"""
    query {{
      spaces(first: {first}, orderBy: "created", orderDirection: asc, where: {{ created_gte: {created_gte}, created_lt: {MAX_TIMESTAMP} }}) {{
        id
        name
        twitter
        proposalsCount
        votesCount
        followersCount
        website
        created
      }}
    }}
  """
  return query


session = requests_cache.CachedSession(
  '.cache',
  allowable_methods=['GET', 'POST'],
)

def get_all_deployments():
  URL = "https://hub.snapshot.org/graphql"
  all_orgs = []

  last_index = 0
  while last_index is not None:
      query = generate_query(BATCH_SIZE, last_index)
      r = session.post(URL, json={'query': query})
      if not r.from_cache:
        time.sleep(backoff_seconds)

      data = json.loads(r.text)

      print(f"last_index {dt.datetime.fromtimestamp(last_index)}, {'HIT' if r.from_cache else 'MISS'}")
      
      if 'errors' in data:
        print(data['errors'])
        if 'The `skip` argument must not be greater than 30000' in data['errors'][0]['message']:
            break
      
      organizations = data['data']['spaces']

      if len(organizations) < BATCH_SIZE:
          last_index = None
      else:
          last_index = organizations[-1]['created']

      if not organizations:
        print('No organizations found')
      else:
        while all_orgs and all_orgs[-1]['id'] == organizations[0]['id']:
          organizations.pop(0)
        all_orgs.extend(organizations)


  return pd.DataFrame(all_orgs)

df = get_all_deployments()

last_index 1970-01-01 01:00:00, HIT
last_index 2021-06-06 22:48:26, HIT
last_index 2021-10-10 08:47:58, HIT
last_index 2021-11-21 05:08:40, HIT
last_index 2021-12-08 05:35:36, HIT
last_index 2022-01-14 19:40:14, HIT
last_index 2022-03-09 04:30:31, HIT
last_index 2022-04-18 17:02:44, HIT
last_index 2022-05-24 04:02:26, HIT
last_index 2022-07-15 04:45:30, HIT
last_index 2022-08-28 05:17:53, HIT
last_index 2022-10-02 12:59:40, HIT
last_index 2022-11-24 11:58:42, HIT
last_index 2023-02-01 13:44:18, HIT
last_index 2023-03-26 20:19:47, HIT
last_index 2023-04-25 00:16:54, HIT
last_index 2023-06-09 05:17:18, HIT
last_index 2023-06-18 14:10:21, HIT
last_index 2023-06-23 12:11:01, HIT
last_index 2023-06-27 08:11:36, HIT
last_index 2023-07-02 10:13:06, HIT
last_index 2023-07-08 10:31:04, HIT
last_index 2023-07-16 01:44:41, HIT
last_index 2023-07-22 01:35:16, HIT
last_index 2023-07-27 22:02:20, HIT
last_index 2023-07-30 11:12:08, HIT
last_index 2023-08-01 01:54:23, HIT
last_index 2023-08-02 13:12:

In [58]:
df[BATCH_SIZE-2:BATCH_SIZE+2]

Unnamed: 0,id,name,twitter,proposalsCount,votesCount,followersCount,website,created
998,cosmicswap.eth,CosmicSwap,CosmicSwap,6,10,6,,1623002548
999,secretsky.eth,Secret Sky,secretskyfin,0,0,0,,1623012506
1000,cheesecakeswap.eth,CheesecakeSwap,cheesecakeswap,4,78,11,,1623016185
1001,samhans.eth,Sam H,samuelmhansen,2,0,1,,1623024130


In [59]:
df = df.drop_duplicates(['id', 'created'])

In [60]:
# how many have less than 10 proposals?
(df['proposalsCount'] < 10).value_counts()
# preview of events to come...

proposalsCount
True     39466
False     4545
Name: count, dtype: int64

In [61]:
# combine twitter and website into an 'additional' column, ignore NaNs
df['additional'] = df[['twitter', 'website']].apply(lambda x: ', '.join(x.dropna()), axis=1)
# drop twitter and website columns
df.drop(['twitter', 'website'], axis=1, inplace=True)
df

Unnamed: 0,id,name,proposalsCount,votesCount,followersCount,created,additional
0,bonustrack.eth,Hi,2,52,2,1605388716,
1,fabien.eth,Fabien,384,4702,966,1605976603,"bonustrack87, https://snapshot.org/#/fabien.eth"
2,ichi.eth,ichi.farm,60,762,55,1605995400,
3,loyalfinance.eth,loyal.finance,2,14,0,1606038518,
4,damflux.eth,FLUX,2,11,6,1606079425,
...,...,...,...,...,...,...,...
44008,islamchik2006.eth,Islam_crypto,1,1,1,1704053617,
44009,luksh.eth,Luksh,2,2,0,1704054770,https://youtube.com/@cryptoman5041?si=jpcLEQZw...
44010,dimonzamillionare.eth,Rich DAO,3,4,2,1704057684,
44011,rog-strix.eth,rog,0,0,1,1704059032,


In [62]:
# add platform = 'snapshot'
df['platform'] = 'snapshot'
# rename id to platform_id
df.rename(columns={'id': 'platform_id'}, inplace=True)
df

Unnamed: 0,platform_id,name,proposalsCount,votesCount,followersCount,created,additional,platform
0,bonustrack.eth,Hi,2,52,2,1605388716,,snapshot
1,fabien.eth,Fabien,384,4702,966,1605976603,"bonustrack87, https://snapshot.org/#/fabien.eth",snapshot
2,ichi.eth,ichi.farm,60,762,55,1605995400,,snapshot
3,loyalfinance.eth,loyal.finance,2,14,0,1606038518,,snapshot
4,damflux.eth,FLUX,2,11,6,1606079425,,snapshot
...,...,...,...,...,...,...,...,...
44008,islamchik2006.eth,Islam_crypto,1,1,1,1704053617,,snapshot
44009,luksh.eth,Luksh,2,2,0,1704054770,https://youtube.com/@cryptoman5041?si=jpcLEQZw...,snapshot
44010,dimonzamillionare.eth,Rich DAO,3,4,2,1704057684,,snapshot
44011,rog-strix.eth,rog,0,0,1,1704059032,,snapshot


In [63]:
# add website column https://snapshot.org/#/ + platform_id
df['website'] = 'https://snapshot.org/#/' + df['platform_id']
df

Unnamed: 0,platform_id,name,proposalsCount,votesCount,followersCount,created,additional,platform,website
0,bonustrack.eth,Hi,2,52,2,1605388716,,snapshot,https://snapshot.org/#/bonustrack.eth
1,fabien.eth,Fabien,384,4702,966,1605976603,"bonustrack87, https://snapshot.org/#/fabien.eth",snapshot,https://snapshot.org/#/fabien.eth
2,ichi.eth,ichi.farm,60,762,55,1605995400,,snapshot,https://snapshot.org/#/ichi.eth
3,loyalfinance.eth,loyal.finance,2,14,0,1606038518,,snapshot,https://snapshot.org/#/loyalfinance.eth
4,damflux.eth,FLUX,2,11,6,1606079425,,snapshot,https://snapshot.org/#/damflux.eth
...,...,...,...,...,...,...,...,...,...
44008,islamchik2006.eth,Islam_crypto,1,1,1,1704053617,,snapshot,https://snapshot.org/#/islamchik2006.eth
44009,luksh.eth,Luksh,2,2,0,1704054770,https://youtube.com/@cryptoman5041?si=jpcLEQZw...,snapshot,https://snapshot.org/#/luksh.eth
44010,dimonzamillionare.eth,Rich DAO,3,4,2,1704057684,,snapshot,https://snapshot.org/#/dimonzamillionare.eth
44011,rog-strix.eth,rog,0,0,1,1704059032,,snapshot,https://snapshot.org/#/rog-strix.eth


In [64]:
# rename votesCount to votes_count and proposalsCount to proposals_count
df.rename(columns={'votesCount': 'votes_count', 'proposalsCount': 'proposals_count'}, inplace=True)
# reorder columns
df = df[['platform', 'platform_id', 'name', 'additional', 'website', 'votes_count', 'proposals_count']]
df

Unnamed: 0,platform,platform_id,name,additional,website,votes_count,proposals_count
0,snapshot,bonustrack.eth,Hi,,https://snapshot.org/#/bonustrack.eth,52,2
1,snapshot,fabien.eth,Fabien,"bonustrack87, https://snapshot.org/#/fabien.eth",https://snapshot.org/#/fabien.eth,4702,384
2,snapshot,ichi.eth,ichi.farm,,https://snapshot.org/#/ichi.eth,762,60
3,snapshot,loyalfinance.eth,loyal.finance,,https://snapshot.org/#/loyalfinance.eth,14,2
4,snapshot,damflux.eth,FLUX,,https://snapshot.org/#/damflux.eth,11,2
...,...,...,...,...,...,...,...
44008,snapshot,islamchik2006.eth,Islam_crypto,,https://snapshot.org/#/islamchik2006.eth,1,1
44009,snapshot,luksh.eth,Luksh,https://youtube.com/@cryptoman5041?si=jpcLEQZw...,https://snapshot.org/#/luksh.eth,2,2
44010,snapshot,dimonzamillionare.eth,Rich DAO,,https://snapshot.org/#/dimonzamillionare.eth,4,3
44011,snapshot,rog-strix.eth,rog,,https://snapshot.org/#/rog-strix.eth,0,0


In [65]:
# save to csv
df.to_csv('snapshot_deployments.csv', index=False)