In [27]:
# mkdir if not exist 'proposals_out_2023_07_21'
import os
if not os.path.exists('proposals_out_2023_07_21'):
    os.makedirs('proposals_out_2023_07_21')

In [28]:
# read in ../../DATA/deployments/snapshot_deployments.csv
import pandas as pd
df = pd.read_csv('../../DATA/deployments/snapshot_deployments.csv')
space_ids = df.platform_id.unique()
len(space_ids)

26902

In [29]:
df.head()

Unnamed: 0,platform,platform_id,name,additional,website,votes_count,proposals_count
0,snapshot,ethfund.eth,ETHFUND,,https://snapshot.org/#/ethfund.eth,1,1
1,snapshot,fadedfate.eth,Magic Girls,liuhaha5308848,https://snapshot.org/#/fadedfate.eth,1,3
2,snapshot,fangnaoke.eth,study group,https://app.syndicate.io/collectives/0x5f51845...,https://snapshot.org/#/fangnaoke.eth,1,1
3,snapshot,fasola.eth,FASOLA.ETH,,https://snapshot.org/#/fasola.eth,7,1
4,snapshot,fdsfsfss.eth,alex,,https://snapshot.org/#/fdsfsfss.eth,3,3


In [30]:
df.proposals_count.describe()

count    27471.000000
mean         4.371883
std         30.036288
min          0.000000
25%          1.000000
50%          1.000000
75%          2.000000
max       3177.000000
Name: proposals_count, dtype: float64

In [31]:
# remove the space_ids that have no proposals
df = df[df.proposals_count > 0]
platforms_and_proposal_count = df[['platform_id', 'proposals_count']].sort_values(by='proposals_count', ascending=False)
# make these into a list of tuples
platforms_and_proposal_count = list(platforms_and_proposal_count.itertuples(index=False, name=None))
platforms_and_proposal_count[:5]

[('cakevote.eth', 3177),
 ('snapshot.dcl.eth', 2226),
 ('index-coop.eth', 1052),
 ('frami.eth', 657),
 ('huwacoin.eth', 631)]

In [32]:
# make groups of rows where the proposals_count in the group sums to less than 5000
groups_of_space_ids_that_sum_to_less_than_5000 = []
while len(platforms_and_proposal_count) > 0:
    group = []
    total = 0
    while total < 5000:
        if platforms_and_proposal_count[0][1] + total > 5000:
            break
        row = platforms_and_proposal_count.pop(0)
        group.append(row)
        total += row[1]
        if len(platforms_and_proposal_count) == 0:
            break
    groups_of_space_ids_that_sum_to_less_than_5000.append(group)

groups_of_space_ids_that_sum_to_less_than_5000[:5]

[[('cakevote.eth', 3177)],
 [('snapshot.dcl.eth', 2226),
  ('index-coop.eth', 1052),
  ('frami.eth', 657),
  ('huwacoin.eth', 631)],
 [('balancer.eth', 602),
  ('bentfinance.eth', 542),
  ('bancornetwork.eth', 541),
  ('sdbal.eth', 535),
  ('lemu.dcl.eth', 476),
  ('sharkdao.eth', 471),
  ('aave.eth', 445),
  ('aavegotchi.eth', 433),
  ('aurafinance.eth', 415),
  ('fabien.eth', 367)],
 [('orbapp.eth', 365),
  ('frax.eth', 359),
  ('alluo.eth', 357),
  ('jbdao.eth', 354),
  ('goodmorningnews.eth', 352),
  ('aladdindao.eth', 331),
  ('magicappstore.eth', 319),
  ('fuse.eth', 317),
  ('goopsnapshot.eth', 315),
  ('leagueoflils.eth', 299),
  ('sdfxs.eth', 289),
  ('olympusdao.eth', 272),
  ('狂奔的蜗牛1.eth', 271),
  ('17707.eth', 262),
  ('primerating.eth', 261),
  ('veclev.eth', 260)],
 [('xdaistake.eth', 257),
  ('bgansv2.eth', 255),
  ('fantomecosystem.eth', 250),
  ('decentralgames.eth', 238),
  ('lido-snapshot.eth', 228),
  ('karastar.eth', 222),
  ('theheaddao.eth', 219),
  ('pnounsdao.e

In [33]:
for g in groups_of_space_ids_that_sum_to_less_than_5000:
    print(len(g), sum((x[1] for x in g)))

1 3177
4 4566
10 4827
16 4983
23 4918
32 4910
44 4967
57 4951
73 4943
96 4992
120 4967
150 4977
190 4984
236 4998
293 4987
358 4997
457 5000
584 4994
811 4997
1125 4998
1765 4999
2500 5000
4969 5000
5000 5000
2968 2968


In [34]:
len(groups_of_space_ids_that_sum_to_less_than_5000)

25

In [35]:
# sum the proposals_count in each group
sums = [sum([x[1] for x in group]) for group in groups_of_space_ids_that_sum_to_less_than_5000]
sums

[3177,
 4566,
 4827,
 4983,
 4918,
 4910,
 4967,
 4951,
 4943,
 4992,
 4967,
 4977,
 4984,
 4998,
 4987,
 4997,
 5000,
 4994,
 4997,
 4998,
 4999,
 5000,
 5000,
 5000,
 2968]

In [36]:
# make into a list of strings for the graphql query
groups_of_space_ids_that_sum_to_less_than_5000_strings = []
for group in groups_of_space_ids_that_sum_to_less_than_5000:
    group_of_space_ids = [str(x[0]) for x in group]
    query_string = ", ".join([f'"{sid}"' for sid in group_of_space_ids])
    groups_of_space_ids_that_sum_to_less_than_5000_strings.append(query_string)

groups_of_space_ids_that_sum_to_less_than_5000_strings[0]

'"cakevote.eth"'

In [37]:
# TODO: Instead of using first/skip use first / created or any other "index"
# Furthermore, use some caching library instead, so if the space_ids / created / skip combo changes,
# we don't get incorrect results
def make_query(space_ids: str, skip: int) -> str:
    first = 1000
    base = """
        query Proposals {
         proposals(where: {space_in: [%s], state: "closed"}, first: %s, skip: %s, orderDirection: desc, orderBy: "created") {
            id
            space {
              id
            }
            author
            created
            votes
            title
            body
            start
            end
          }
        }
    """
    return base % (space_ids, first, skip)


In [38]:
import requests
import backoff
URL = "https://hub.snapshot.org/graphql"


@backoff.on_exception(backoff.expo,
                      requests.exceptions.RequestException)
def query_snapshot(query):
    r = requests.post(URL, json={'query': query})
    return r.json()

In [39]:
import json
from tqdm.autonotebook import tqdm

for count, space_id_group in enumerate(tqdm(groups_of_space_ids_that_sum_to_less_than_5000_strings)):
    # make pagination logic
    skip = 0
    has_more = True
    while has_more:
        filename = f'proposals_out_2023_07_21/proposals_{count}_{skip}.json'
        next_proposals_filename = f'proposals_out_2023_07_21/proposals_{count}_{skip + 1000}.json'
        if os.path.exists(next_proposals_filename):
            print(f'next file exists: {next_proposals_filename}')
            skip += 1000
            continue
        next_group_filename = f'proposals_out_2023_07_21/proposals_{count + 1}_0.json'
        if os.path.exists(next_group_filename):
            print(f'next group file exists: {next_group_filename}')
            has_more = False
            continue
        query = make_query(space_id_group, skip)
        result = query_snapshot(query)
        proposals = result['data']['proposals']
        print('got', len(proposals), 'proposals for', count, skip)
        # save proposals to file in the output dir
        with open(filename, 'w') as f:
            json.dump(proposals, f)
        if len(proposals) == 1000:
            skip += 1000
        else:
            has_more = False


  0%|          | 0/25 [00:00<?, ?it/s]

got 1000 proposals for 0 0
got 1000 proposals for 0 1000
got 1000 proposals for 0 2000
got 666 proposals for 0 3000
got 1000 proposals for 1 0
got 1000 proposals for 1 1000
got 1000 proposals for 1 2000
got 1000 proposals for 1 3000
got 825 proposals for 1 4000
got 1000 proposals for 2 0
got 1000 proposals for 2 1000
got 1000 proposals for 2 2000
got 1000 proposals for 2 3000
got 1000 proposals for 2 4000
got 449 proposals for 2 5000
got 1000 proposals for 3 0
got 1000 proposals for 3 1000
got 1000 proposals for 3 2000
got 1000 proposals for 3 3000
got 1000 proposals for 3 4000
got 652 proposals for 3 5000
got 1000 proposals for 4 0
got 1000 proposals for 4 1000
got 1000 proposals for 4 2000
got 1000 proposals for 4 3000
got 1000 proposals for 4 4000
got 218 proposals for 4 5000
got 1000 proposals for 5 0
got 1000 proposals for 5 1000
got 1000 proposals for 5 2000
got 1000 proposals for 5 3000
got 1000 proposals for 5 4000
got 97 proposals for 5 5000
got 1000 proposals for 6 0
got 1000