This notebook generates the CSV file for a given DAO, from the DAO Census dataset

In [96]:
from typing import Optional
from pathlib import Path
import datetime as dt

import numpy as np
import pandas as pd

## Hparams (DAO name)

In [119]:
ORG_NAME: str = 'Decentraland'
FILTER_PLATFORMS: str = 'snapshot'
KAGGLE_DATASET: str = 'daviddavo/daos-census-tfm'
EXPORT_PATH: Path = Path('../data/') / ORG_NAME
AUX_PATH: Path = Path('~/Downloads/daos-census-tfm').expanduser()
CUTOFF_DATE_STR: Optional[str] = None

# This dictionary "merges" organizations
ORGS_DICT: dict[str, list[str]] = {
    'dxDAO - xDXdao': ['dxDAO', 'xDXdao'],
    'Aave - Aavegotchi': ['Aave', 'Aavegotchi', 'AAVE'],
    'MetaCartel - MetaCartel Ventures': ['MetaCartel Ventures', 'MetaCartel xDai', 'MetaCartel DAO'],
}

In [57]:
CUTOFF_DATE = dt.datetime.fromisoformat(CUTOFF_DATE_STR) if CUTOFF_DATE_STR else None

## Downloading the dataset if it does not exist

In [58]:
import kaggle

In [59]:
if not AUX_PATH.exists():
    kaggle.api.dataset_download_cli(KAGGLE_DATASET, path=AUX_PATH, unzip=True)

## Processing the dataset

In [60]:
import duckdb

In [61]:
def _list2sql(lst: list[str]) -> str:
    return "".join(["(", ", ".join(map("'{}'".format, lst)), ")"])

def _gen_orgs_query(parquet: Path) -> str:
    _casestr = "    WHEN name IN {caselst} THEN '{orgname}'"

    _cases = "\n".join(_casestr.format(
        orgname=orgname,
        caselst=_list2sql(caselst),
    ) for orgname, caselst in ORGS_DICT.items())
    
    return f"""
CREATE VIEW deployments AS
SELECT * EXCLUDE (name),
    name AS deployment_name,
    CASE 
{_cases}
    ELSE name
    END AS name
FROM parquet_scan('{parquet}')
    """

### Import from parquets

In [133]:
db = duckdb.connect(database=':memory:', read_only=False)
db.execute(_gen_orgs_query(AUX_PATH / 'deployments.parquet'))
_cond_date = ""
if CUTOFF_DATE:
    _cond_date = f"WHERE date <= '{CUTOFF_DATE.isoformat()}'"

db.execute("CREATE VIEW votes AS SELECT * FROM parquet_scan('{}') {}".format(AUX_PATH / "votes.parquet", _cond_date))
db.execute("CREATE VIEW proposals AS SELECT * FROM parquet_scan('{}') {}".format(AUX_PATH / "proposals-text.parquet", _cond_date))

<duckdb.duckdb.DuckDBPyConnection at 0x7fe5cd838170>

### Get votes table

In [134]:
cond_dfv = [f"name='{ORG_NAME}'"]

if FILTER_PLATFORMS:
    filter_platforms = FILTER_PLATFORMS
    if isinstance(FILTER_PLATFORMS, str):
        filter_platforms = [FILTER_PLATFORMS]

    cond_dfv.append(f"platform IN {_list2sql(filter_platforms)}")

dfv = db.execute(q := f"""
SELECT platform, name, votes.*
FROM deployments
RIGHT JOIN votes ON (deployments.id = votes.deployment_id)
WHERE {" AND ".join(cond_dfv)}
""").fetchdf().rename(columns=lambda x: x.replace('_id', ''))
dfv['voter'] = dfv['voter'].str.lower()
dfv

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,platform,name,id,proposal,deployment,platform_vote,voter,date,choice,weight
0,snapshot,Decentraland,2d6a4c7e-3ed8-593d-87cc-c5adb2b3c35a,17e6c658-5933-56f7-9646-f5581ec5e5db,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmVb6RN7Na6KY2j6YCSNZjppk1upyxMz9rh3qkvHSMUw48,0xb0145ae156d201d6e371d07265fe3c045071c967,2022-05-23 15:51:13,"[0, 0, 0, 19896.78, 100, 426000]",445996.0
1,snapshot,Decentraland,83dc22c0-fe15-5abf-8ff9-11cd2e85911f,4776398b-ce68-5820-ab0e-fdcdef922fab,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0xd3d7de797b6e3bb76a97e8fc5cf999c5ac481f16368f...,0x0b8e5bdbb5b8d83af32d3984ba9bfae635edf156,2022-05-23 03:59:04,"[0, 0, 0, 7.086853435702049, 0, 0]",7.0
2,snapshot,Decentraland,cca3087e-75cc-530d-a161-dfdcf8f7f264,f7047b92-6baf-5a8f-91d4-5b7907805662,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0x8b8e0a2505802c8ee19b179fb0994bec883d09c5ba9c...,0x0b8e5bdbb5b8d83af32d3984ba9bfae635edf156,2022-05-23 03:58:40,"[0, 0, 0, 7.086853435702049, 0, 0]",7.0
3,snapshot,Decentraland,24d0ef37-f458-5a74-afe2-36efc8b590c3,17e6c658-5933-56f7-9646-f5581ec5e5db,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0x35360881f4e3fbef9ebb3feea83c8d009e794042e1ea...,0x0b8e5bdbb5b8d83af32d3984ba9bfae635edf156,2022-05-23 03:54:05,"[0, 0, 0, 7.086853435702049, 0, 0]",7.0
4,snapshot,Decentraland,f4618e98-a7fa-5466-a4ed-e373c98ebfcc,f7047b92-6baf-5a8f-91d4-5b7907805662,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0x56224fcc7c0e061c7d182d1183f1bb3cde2b7beff8e7...,0x62647118b8b13dd459232ae46c8feb157a054d1f,2022-05-23 02:30:05,"[0, 88000, 32000, 0, 200, 0]",120200.0
...,...,...,...,...,...,...,...,...,...,...
116555,snapshot,Decentraland,df28a015-2ecb-52e2-a005-2751c30aed56,712af6bc-7218-5caf-9be3-06712907ca65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmPnZFfM3pXs8ucDxgHXKcCauxXA7LDz6wmt9yRNAGN5AU,0x7820fd2ca0ccaba83b3639a2d5343573b7989863,2021-11-10 03:28:41,"[0, 2000, 0, 13137.170593899164, 100, 0]",15237.0
116556,snapshot,Decentraland,894f9d91-be18-5cf2-99f3-058a57ab3d69,712af6bc-7218-5caf-9be3-06712907ca65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmSitqftbm4oCCyGd3h3qXX1UYsNx3aWuUWoKLr7ek7VRE,0x3609cd1df99274f09d60cdf669e5bf8073fbd94f,2021-11-09 03:16:28,"[0, 0, 8000, 1250.15815784, 100, 0]",9350.0
116557,snapshot,Decentraland,1d950d18-15b5-5467-a952-01572709d45a,712af6bc-7218-5caf-9be3-06712907ca65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmcK6a7fraeUoNZf2n2KEgBSfh3iJS72NBxpBoTBXKrQup,0xbbeec2733f441b144726f819d4d8dee82f65b4f2,2021-11-08 10:42:36,"[0, 2000, 0, 1869.82857425, 0, 0]",3869.0
116558,snapshot,Decentraland,ba1aee0d-8d6a-5375-b961-9b9cbb319b18,712af6bc-7218-5caf-9be3-06712907ca65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmZGUXrD2QmUWfahSSJYHT2x2KAgXG493PX51aJnZFWeQd,0xc54a6c3778016b06cbd126ccc3b5bc06c5f666fb,2021-11-07 15:13:30,"[0, 0, 0, 145, 0, 0]",145.0


#### Clean and save votes table

We will only leave the following columns:
- id
- proposal
- voter
- date

In [142]:
EXPORT_PATH.mkdir(exist_ok=True, parents=True)
dfv_cols = ['id', 'proposal', 'voter', 'date']
votes_file = EXPORT_PATH / 'votes.pq'
assert not votes_file.exists(), "Cant overwrite files"
dfv[dfv_cols].sort_values('date').to_parquet(votes_file, index=False)

### Get proposals table

In [143]:
dfp = db.execute(q := f"""
SELECT platform, name, platform_deployment_id, proposals.* EXCLUDE (votes_count), count(votes.id) AS votes_count
FROM deployments
RIGHT JOIN proposals ON (deployments.id = proposals.deployment_id)
LEFT JOIN votes ON (proposals.id = votes.proposal_id)
WHERE {" AND ".join(cond_dfv)}
GROUP BY proposals.*
-- HAVING count(votes.id) >= {0}
""").fetchdf().rename(columns=lambda x: x.replace('_id', ''))
dfp['author'] = dfp['author'].str.lower()
dfp

Unnamed: 0,platform,name,platform_deployment,id,deployment,platform_proposal,author,date,title,description,start,end,votes_count
0,snapshot,Decentraland,snapshot.dcl.eth,25867804-0cff-5917-92cb-77c58fda7d5e,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0xbddb5c2fcd913d4129a16013a9658d9546799cb005e9...,0xbb7b59afa3a0e5be143b8fe9c641f00c1ecb9d69,2023-02-02 12:20:25,Should the DAO implement a mechanism to delega...,> by 0x1e93e534c5e26b01ed242410b43ae23dd0faa52...,2023-02-02 12:20:00,2023-02-09 12:20:00,311
1,snapshot,Decentraland,snapshot.dcl.eth,cd32c157-d64b-55f7-86b5-34a6153d0188,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmWqMCLrDSk8sZw3DW7MCJLoxVfRCsimBsVG5WQMPhthJW,0x5e23d08324f017d5425e59a2782c9ae27ace0958,2022-02-11 04:05:29,"Add the location 141,-36 to the Points of Inte...",> by 0xcdbbba582ef4b5d9661b3837bdfab21ab856fc3...,2022-02-11 04:05:00,2022-02-18 04:05:00,16
2,snapshot,Decentraland,snapshot.dcl.eth,f5f89164-44bc-505e-b79e-81607ddfec5f,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmP8q8dnJ7nUVFoce2i25MW1Paj3vuAH4T3ismKmd9FAdK,0x5e23d08324f017d5425e59a2782c9ae27ace0958,2022-02-06 15:53:23,"Chill, travel & Hobbies in the 4 corners of th...",> by 0xc5cf3073bb48064004fb9456c8745d970937d49...,2022-02-06 15:53:00,2022-02-20 15:53:00,15
3,snapshot,Decentraland,snapshot.dcl.eth,c375a995-3268-5297-a505-6707aa3b3cd0,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmcpmMzxSUYv9fG5tSJR7ZmhXGtARio8p1tnJ57mtQ4Fvx,0x5e23d08324f017d5425e59a2782c9ae27ace0958,2022-02-08 00:04:22,Ethereum (L1) Wearables: A Comprehensive Propo...,> by 0xdab85506424396fb090d84b64ad63b770d87c02...,2022-02-08 00:04:00,2022-02-13 00:04:00,15
4,snapshot,Decentraland,snapshot.dcl.eth,12044fb6-6d66-56da-9f3e-6cef9472cc1f,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmVEaMM3ptB6717pSQ4LnCGyqjWM84Ar96hyZCT3Ebn1wt,0x5e23d08324f017d5425e59a2782c9ae27ace0958,2022-01-31 01:48:28,"Add the location -42,109 to the Points of Inte...",> by 0x6965e11b5b9bee8cfc62109a6ce38b9f41c3e0c...,2022-01-31 01:48:00,2022-02-07 01:48:00,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2208,snapshot,Decentraland,snapshot.dcl.eth,c954d142-4ed4-5850-a80b-d3c7959299b0,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0x00ab2254d9a04fb344731bc6f20e7de9678751fa68e5...,0xbb7b59afa3a0e5be143b8fe9c641f00c1ecb9d69,2023-09-05 16:41:59,Add Grant Revocation as a new Proposal Categor...,> by 0xd4f1cab694c4424c4796549edbb9b489789f4df...,2023-09-05 16:41:00,2023-09-10 16:41:00,0
2209,snapshot,Decentraland,snapshot.dcl.eth,f6dd1901-2d4c-5391-be88-c34f266fed72,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0xe286e5292dfa07f3d485c36555a0c2d90ee2d1ce5bd8...,0xbb7b59afa3a0e5be143b8fe9c641f00c1ecb9d69,2023-07-31 20:10:41,Decentraland University Live Teaching Platform,> by 0xe5cf1bb88a59f9fc609689c681d1d14bfe7ce73...,2023-07-31 20:10:00,2023-08-14 20:10:00,0
2210,snapshot,Decentraland,snapshot.dcl.eth,15265523-be99-5494-99f1-7c85d2f80cd4,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0x6ea63de35cadf2459a4b0715f5b2003b17e4a1ecfd63...,0xbb7b59afa3a0e5be143b8fe9c641f00c1ecb9d69,2023-09-09 16:57:21,Integrate UI/UX Parameters to Provide Grantee ...,> by 0x2684a202a374d87bb321a744482b89bf6deaf8b...,2023-09-09 16:57:00,2023-09-14 16:57:00,0
2211,snapshot,Decentraland,snapshot.dcl.eth,84a950c1-8bd3-5aac-9861-d4ed99178bfc,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0x36caf07aa45b040ca964c9c3dff435a12e6dd98c23ab...,0xbb7b59afa3a0e5be143b8fe9c641f00c1ecb9d69,2023-07-31 20:27:46,Mobile Backpack Experience,> by 0x511a22cdd2c4ee8357bb02df2578037ffe8a4d8...,2023-07-31 20:27:00,2023-08-10 20:27:00,0


#### Clean and save proposals table

We will only leave the following columns:
- id
- proposal
- voter
- date

In [144]:
!head ../data/Decentraland/proposals.csv

head: cannot open '../data/Decentraland/proposals.csv' for reading: No such file or directory


In [145]:
dfp_cols = ['id', 'author', 'date', 'start', 'end', 'platform_proposal', 'title', 'description']
proposals_file = EXPORT_PATH / 'proposals.pq'
assert not proposals_file.exists(), "Cant overwrite files"
dfp[dfp_cols].sort_values('date').to_parquet(proposals_file, index=False)