In [1]:
# get all of the json files in proposals_out_2023_07_21
# read them all as one big dataframe

import pandas as pd
import glob

# get all of the json files in proposals_out_2023_07_21
file_names = glob.glob('proposals_out_2023_07_21/*.json')
len(file_names)

137

In [2]:
# monkeypatch json
# monkeypatch using standard python json module

import json

pd.io.json._json.loads = lambda s, *a, **kw: json.loads(s)

# monkeypatch using faster simplejson module
import simplejson
pd.io.json._json.loads = lambda s, *a, **kw: simplejson.loads(s)

# normalising (unnesting) at the same time (for nested jsons)
pd.io.json._json.loads = lambda s, *a, **kw: pd.json_normalize(simplejson.loads(s))

In [3]:
# read them all as one big dataframe
df = pd.concat([pd.read_json(f) for f in file_names], ignore_index=True)
df.shape

(125025, 5)

In [4]:
df.head()

Unnamed: 0,id,author,created,votes,space.id
0,0x6e3a5607e6cbe144fcea443f837a580c4fd7bfd19795...,0x167539702B5501aADd9B0B85E53532Fd57cC71a9,1689715693,6,paragonscouncil.eth
1,0x08808d7641cda26238b34dfcf2983fdb4f26b8fbd1a5...,0x806F0e3d1DD6e8499c2471d7e36f6cCA9723bd15,1689696851,1,tle-dao.eth
2,0x9a209c9e8147327d50b6b9bb33206c992aaa75d287d7...,0x86D95b07eB8BF038EFD9e1e3fAD8a389244F58e7,1689694511,1,tle-dao.eth
3,0x1d6eefccb4fb4aea18f6abcc649c5f1678ccfd5e403d...,0x4e14C2cFF2579925B686b5F2081F152802940214,1689692232,1,tle-dao.eth
4,0xce737a7023702de7a12badf7f075dff909cfe228e9ed...,0xF71ACA0452E9eB7d5f025Bf10e1711d7d3b90Aa6,1689691582,1,002688.eth


In [5]:
df.iloc[40]

id          0x359abe8eca09aa2bb45a4a9a21b4f89ea64e619ff68e...
author             0x954b73548737af212Ec87CC07aA79A41668696AF
created                                            1689578365
votes                                                       1
space.id                                         mihail0z.eth
Name: 40, dtype: object

In [6]:
df.columns

Index(['id', 'author', 'created', 'votes', 'space.id'], dtype='object')

In [7]:
# * platform
df['platform'] = 'snapshot'
# * rename space.id to platform_deployment_id
df.rename(columns={'space.id': 'platform_deployment_id'}, inplace=True)
# * rename id to proposal_id
df.rename(columns={'id': 'proposal_id'}, inplace=True)
# * author no chg
# * rename created to date
df.rename(columns={'created': 'date'}, inplace=True)
# * votes to votes_count
df.rename(columns={'votes': 'votes_count'}, inplace=True)

# keep only these cols
df = df[['platform', 'platform_deployment_id', 'proposal_id', 'author', 'date', 'votes_count']]
df.head()

Unnamed: 0,platform,platform_deployment_id,proposal_id,author,date,votes_count
0,snapshot,paragonscouncil.eth,0x6e3a5607e6cbe144fcea443f837a580c4fd7bfd19795...,0x167539702B5501aADd9B0B85E53532Fd57cC71a9,1689715693,6
1,snapshot,tle-dao.eth,0x08808d7641cda26238b34dfcf2983fdb4f26b8fbd1a5...,0x806F0e3d1DD6e8499c2471d7e36f6cCA9723bd15,1689696851,1
2,snapshot,tle-dao.eth,0x9a209c9e8147327d50b6b9bb33206c992aaa75d287d7...,0x86D95b07eB8BF038EFD9e1e3fAD8a389244F58e7,1689694511,1
3,snapshot,tle-dao.eth,0x1d6eefccb4fb4aea18f6abcc649c5f1678ccfd5e403d...,0x4e14C2cFF2579925B686b5F2081F152802940214,1689692232,1
4,snapshot,002688.eth,0xce737a7023702de7a12badf7f075dff909cfe228e9ed...,0xF71ACA0452E9eB7d5f025Bf10e1711d7d3b90Aa6,1689691582,1


In [8]:
# read date as pd date, it's a seconds epoch timestamp
df['date'] = pd.to_datetime(df['date'], unit='s')
df.head()

Unnamed: 0,platform,platform_deployment_id,proposal_id,author,date,votes_count
0,snapshot,paragonscouncil.eth,0x6e3a5607e6cbe144fcea443f837a580c4fd7bfd19795...,0x167539702B5501aADd9B0B85E53532Fd57cC71a9,2023-07-18 21:28:13,6
1,snapshot,tle-dao.eth,0x08808d7641cda26238b34dfcf2983fdb4f26b8fbd1a5...,0x806F0e3d1DD6e8499c2471d7e36f6cCA9723bd15,2023-07-18 16:14:11,1
2,snapshot,tle-dao.eth,0x9a209c9e8147327d50b6b9bb33206c992aaa75d287d7...,0x86D95b07eB8BF038EFD9e1e3fAD8a389244F58e7,2023-07-18 15:35:11,1
3,snapshot,tle-dao.eth,0x1d6eefccb4fb4aea18f6abcc649c5f1678ccfd5e403d...,0x4e14C2cFF2579925B686b5F2081F152802940214,2023-07-18 14:57:12,1
4,snapshot,002688.eth,0xce737a7023702de7a12badf7f075dff909cfe228e9ed...,0xF71ACA0452E9eB7d5f025Bf10e1711d7d3b90Aa6,2023-07-18 14:46:22,1


In [9]:
# save
df.to_csv('snapshot_proposals.csv', index=False)