In [1]:
# get all of the json files in proposals_out_2023_07_21
# read them all as one big dataframe

import pandas as pd
import glob

# get all of the json files in proposals_out_2023_07_21
file_names = glob.glob('proposals_out_2023_07_21/*.json')
len(file_names)

122

In [2]:
# monkeypatch json
# monkeypatch using standard python json module

import json

pd.io.json._json.loads = lambda s, *a, **kw: json.loads(s)

# monkeypatch using faster simplejson module
import simplejson
pd.io.json._json.loads = lambda s, *a, **kw: simplejson.loads(s)

# normalising (unnesting) at the same time (for nested jsons)
pd.io.json._json.loads = lambda s, *a, **kw: pd.json_normalize(simplejson.loads(s))

In [3]:
# read them all as one big dataframe
df = pd.concat([pd.read_json(f) for f in file_names], ignore_index=True)
df.shape

(116544, 9)

In [4]:
df.head()

Unnamed: 0,id,author,created,votes,title,body,start,end,space.id
0,0xd8c75dbcb32a6c82e049b482ecaa9706a78c2fc91d1a...,0x2b49F45B80d7AD5C3Af9C96B5B664A953979d16A,1638196253,3,Is SocialFi or GameFi next hustle?,,1638158400,1656561600,modeplc.eth
1,0xf4fbef4f0a128f233b6da3b744d10d3b7a3c74d4b048...,0x2b49F45B80d7AD5C3Af9C96B5B664A953979d16A,1638195789,3,Is SocialFi or GameFi next hustle?,,1638158400,1656561600,modeplc.eth
2,0xdd9a17031b8e109c04cef929129692e1632e8cfc40a6...,0x9349738c1491d4DDDfddDc3A4Cb0E3e709934C63,1638194719,5,Will Elon be the leadship in Crypto?,,1638158400,1656561600,brummer.eth
3,0xc8c4f6054e9f9e1255768752ee07aaa9bdca15c95749...,0xB2b8DE16675ef164e51910BDf15e62F2f80f7f4c,1638193655,14,[BRIGHT-1] Chain for Bright Union NFT collection,As part of the BRIGHT loyalty program NFTs fro...,1638172800,1638529200,brightunion.eth
4,0xb7ed8c45ce8f8c23e52acc057ddc3d987d012532df7f...,0xAB45d5A30034e7959968cB7362f494bb95c19aB7,1638179838,24,DAO Leadership Nominations,"This vote is open to DYSTOMICE, SPACEMICE and ...",1638216000,1638388800,dystomice.eth


In [5]:
df.iloc[40]

id          0x6ea46224e78ace67342fab4be1ab3853381878bcebee...
author             0x65a5aBB63bb431a9B8377487d0A8cd2a797856f0
created                                            1637860350
votes                                                       1
title             How many times will GhostBlockNFT increase?
body              How many times will GhostBlockNFT increase?
start                                              1637856000
end                                                1638244800
space.id                                           xiaohe.eth
Name: 40, dtype: object

In [6]:
df.columns

Index(['id', 'author', 'created', 'votes', 'title', 'body', 'start', 'end',
       'space.id'],
      dtype='object')

In [7]:
# * platform
df['platform'] = 'snapshot'
# * rename space.id to platform_deployment_id
df.rename(columns={'space.id': 'platform_deployment_id'}, inplace=True)
# * rename id to proposal_id
df.rename(columns={'id': 'proposal_id'}, inplace=True)
# * author no chg
# * rename created to date
df.rename(columns={'created': 'date'}, inplace=True)
# * votes to votes_count
df.rename(columns={'votes': 'votes_count'}, inplace=True)
# * body to description
df.rename(columns={'body': 'description'}, inplace=True)

# keep only these cols
df = df[['platform', 'platform_deployment_id', 'proposal_id', 'author', 'date', 'votes_count', 'title', 'description', 'start', 'end']]
df.head()

Unnamed: 0,platform,platform_deployment_id,proposal_id,author,date,votes_count,title,description,start,end
0,snapshot,modeplc.eth,0xd8c75dbcb32a6c82e049b482ecaa9706a78c2fc91d1a...,0x2b49F45B80d7AD5C3Af9C96B5B664A953979d16A,1638196253,3,Is SocialFi or GameFi next hustle?,,1638158400,1656561600
1,snapshot,modeplc.eth,0xf4fbef4f0a128f233b6da3b744d10d3b7a3c74d4b048...,0x2b49F45B80d7AD5C3Af9C96B5B664A953979d16A,1638195789,3,Is SocialFi or GameFi next hustle?,,1638158400,1656561600
2,snapshot,brummer.eth,0xdd9a17031b8e109c04cef929129692e1632e8cfc40a6...,0x9349738c1491d4DDDfddDc3A4Cb0E3e709934C63,1638194719,5,Will Elon be the leadship in Crypto?,,1638158400,1656561600
3,snapshot,brightunion.eth,0xc8c4f6054e9f9e1255768752ee07aaa9bdca15c95749...,0xB2b8DE16675ef164e51910BDf15e62F2f80f7f4c,1638193655,14,[BRIGHT-1] Chain for Bright Union NFT collection,As part of the BRIGHT loyalty program NFTs fro...,1638172800,1638529200
4,snapshot,dystomice.eth,0xb7ed8c45ce8f8c23e52acc057ddc3d987d012532df7f...,0xAB45d5A30034e7959968cB7362f494bb95c19aB7,1638179838,24,DAO Leadership Nominations,"This vote is open to DYSTOMICE, SPACEMICE and ...",1638216000,1638388800


In [8]:
# read date as pd date, it's a seconds epoch timestamp
df['date'] = pd.to_datetime(df['date'], unit='s')
df['start'] = pd.to_datetime(df['start'], unit='s')
df['end'] = pd.to_datetime(df['end'], unit='s')
df.head()

Unnamed: 0,platform,platform_deployment_id,proposal_id,author,date,votes_count,title,description,start,end
0,snapshot,modeplc.eth,0xd8c75dbcb32a6c82e049b482ecaa9706a78c2fc91d1a...,0x2b49F45B80d7AD5C3Af9C96B5B664A953979d16A,2021-11-29 14:30:53,3,Is SocialFi or GameFi next hustle?,,2021-11-29 04:00:00,2022-06-30 04:00:00
1,snapshot,modeplc.eth,0xf4fbef4f0a128f233b6da3b744d10d3b7a3c74d4b048...,0x2b49F45B80d7AD5C3Af9C96B5B664A953979d16A,2021-11-29 14:23:09,3,Is SocialFi or GameFi next hustle?,,2021-11-29 04:00:00,2022-06-30 04:00:00
2,snapshot,brummer.eth,0xdd9a17031b8e109c04cef929129692e1632e8cfc40a6...,0x9349738c1491d4DDDfddDc3A4Cb0E3e709934C63,2021-11-29 14:05:19,5,Will Elon be the leadship in Crypto?,,2021-11-29 04:00:00,2022-06-30 04:00:00
3,snapshot,brightunion.eth,0xc8c4f6054e9f9e1255768752ee07aaa9bdca15c95749...,0xB2b8DE16675ef164e51910BDf15e62F2f80f7f4c,2021-11-29 13:47:35,14,[BRIGHT-1] Chain for Bright Union NFT collection,As part of the BRIGHT loyalty program NFTs fro...,2021-11-29 08:00:00,2021-12-03 11:00:00
4,snapshot,dystomice.eth,0xb7ed8c45ce8f8c23e52acc057ddc3d987d012532df7f...,0xAB45d5A30034e7959968cB7362f494bb95c19aB7,2021-11-29 09:57:18,24,DAO Leadership Nominations,"This vote is open to DYSTOMICE, SPACEMICE and ...",2021-11-29 20:00:00,2021-12-01 20:00:00


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116544 entries, 0 to 116543
Data columns (total 10 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   platform                116544 non-null  object        
 1   platform_deployment_id  116544 non-null  object        
 2   proposal_id             116544 non-null  object        
 3   author                  116544 non-null  object        
 4   date                    116544 non-null  datetime64[ns]
 5   votes_count             116544 non-null  int64         
 6   title                   116544 non-null  object        
 7   description             116544 non-null  object        
 8   start                   116544 non-null  datetime64[ns]
 9   end                     116544 non-null  datetime64[ns]
dtypes: datetime64[ns](3), int64(1), object(6)
memory usage: 8.9+ MB


In [10]:
df[df['votes_count'].isna()]

Unnamed: 0,platform,platform_deployment_id,proposal_id,author,date,votes_count,title,description,start,end


In [11]:
# save
# df.to_csv('snapshot_proposals.csv', index=False)
df.to_parquet('snapshot_proposals.parquet', index=False)

In [12]:
aux = pd.read_parquet('snapshot_proposals.parquet')
aux.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116544 entries, 0 to 116543
Data columns (total 10 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   platform                116544 non-null  object        
 1   platform_deployment_id  116544 non-null  object        
 2   proposal_id             116544 non-null  object        
 3   author                  116544 non-null  object        
 4   date                    116544 non-null  datetime64[ns]
 5   votes_count             116544 non-null  int64         
 6   title                   116544 non-null  object        
 7   description             116544 non-null  object        
 8   start                   116544 non-null  datetime64[ns]
 9   end                     116544 non-null  datetime64[ns]
dtypes: datetime64[ns](3), int64(1), object(6)
memory usage: 8.9+ MB
