In [50]:
# get all of the json files in proposals_out_2023_07_20
# read them all as one big dataframe

import pandas as pd
import glob

# get all of the json files in proposals_out_2023_07_20
file_names = glob.glob('proposals_out_2023_07_20/*.json')
len(file_names)

139

In [51]:
# monkeypatch json
# monkeypatch using standard python json module

import json

pd.io.json._json.loads = lambda s, *a, **kw: json.loads(s)

# monkeypatch using faster simplejson module
import simplejson
pd.io.json._json.loads = lambda s, *a, **kw: simplejson.loads(s)

# normalising (unnesting) at the same time (for nested jsons)
pd.io.json._json.loads = lambda s, *a, **kw: pd.json_normalize(simplejson.loads(s))

In [52]:
# read them all as one big dataframe
df = pd.concat([pd.read_json(f) for f in file_names], ignore_index=True)
df.shape

(124634, 5)

In [53]:
df.head()

Unnamed: 0,id,author,created,votes,space.id
0,0xa3361c15ebb91e731146b652da16242384bd7a5679d8...,0x0242834979411FfCE95cDA74a5b00d0107a92d75,1689871931,1,798777.eth
1,0x753a316234628e0e97bbc04c7513deec517fa99671fd...,0xCDCbFFa97EDaF82Abd6C02D2aB12D59D7A700a01,1689870878,1,798777.eth
2,0x0e2ba26f4512e9ccc90491a1b9f64f395d6834a5eb2d...,0xF509B73e606A54281147711a51Cf8572BBE9987b,1689869558,1,798777.eth
3,0x08fbbc7bbcfb0b972dcd6d5b4f60ee9a5be4470f0f43...,0x483a15A333eDC0A2C2D9Bbd986e6692A0FFA8471,1689865290,1,798777.eth
4,0x61c7c68e5bd50cbb1ee45f3804eef83f0bc1be0d428f...,0x483a15A333eDC0A2C2D9Bbd986e6692A0FFA8471,1689865267,1,798777.eth


In [54]:
df.iloc[40]

id          0x9a528ab6196315854f95bffc8edc305cd20781c692e3...
author             0x677Ef61298372e2B56757Cae32F01B350B6013Bc
created                                            1689577850
votes                                                       1
space.id                               computers.freesubs.eth
Name: 40, dtype: object

In [55]:
df.columns

Index(['id', 'author', 'created', 'votes', 'space.id'], dtype='object')

In [56]:
# * platform
df['platform'] = 'snapshot'
# * rename space.id to platform_deployment_id
df.rename(columns={'space.id': 'platform_deployment_id'}, inplace=True)
# * rename id to proposal_id
df.rename(columns={'id': 'proposal_id'}, inplace=True)
# * author no chg
# * rename created to date
df.rename(columns={'created': 'date'}, inplace=True)
# * votes to votes_count
df.rename(columns={'votes': 'votes_count'}, inplace=True)

# keep only these cols
df = df[['platform', 'platform_deployment_id', 'proposal_id', 'author', 'date', 'votes_count']]
df.head()

Unnamed: 0,platform,platform_deployment_id,proposal_id,author,date,votes_count
0,snapshot,798777.eth,0xa3361c15ebb91e731146b652da16242384bd7a5679d8...,0x0242834979411FfCE95cDA74a5b00d0107a92d75,1689871931,1
1,snapshot,798777.eth,0x753a316234628e0e97bbc04c7513deec517fa99671fd...,0xCDCbFFa97EDaF82Abd6C02D2aB12D59D7A700a01,1689870878,1
2,snapshot,798777.eth,0x0e2ba26f4512e9ccc90491a1b9f64f395d6834a5eb2d...,0xF509B73e606A54281147711a51Cf8572BBE9987b,1689869558,1
3,snapshot,798777.eth,0x08fbbc7bbcfb0b972dcd6d5b4f60ee9a5be4470f0f43...,0x483a15A333eDC0A2C2D9Bbd986e6692A0FFA8471,1689865290,1
4,snapshot,798777.eth,0x61c7c68e5bd50cbb1ee45f3804eef83f0bc1be0d428f...,0x483a15A333eDC0A2C2D9Bbd986e6692A0FFA8471,1689865267,1


In [57]:
# read date as pd date, it's a seconds epoch timestamp
df['date'] = pd.to_datetime(df['date'], unit='s')
df.head()

Unnamed: 0,platform,platform_deployment_id,proposal_id,author,date,votes_count
0,snapshot,798777.eth,0xa3361c15ebb91e731146b652da16242384bd7a5679d8...,0x0242834979411FfCE95cDA74a5b00d0107a92d75,2023-07-20 16:52:11,1
1,snapshot,798777.eth,0x753a316234628e0e97bbc04c7513deec517fa99671fd...,0xCDCbFFa97EDaF82Abd6C02D2aB12D59D7A700a01,2023-07-20 16:34:38,1
2,snapshot,798777.eth,0x0e2ba26f4512e9ccc90491a1b9f64f395d6834a5eb2d...,0xF509B73e606A54281147711a51Cf8572BBE9987b,2023-07-20 16:12:38,1
3,snapshot,798777.eth,0x08fbbc7bbcfb0b972dcd6d5b4f60ee9a5be4470f0f43...,0x483a15A333eDC0A2C2D9Bbd986e6692A0FFA8471,2023-07-20 15:01:30,1
4,snapshot,798777.eth,0x61c7c68e5bd50cbb1ee45f3804eef83f0bc1be0d428f...,0x483a15A333eDC0A2C2D9Bbd986e6692A0FFA8471,2023-07-20 15:01:07,1


In [58]:
# save
df.to_csv('snapshot_proposals.csv', index=False)