In [1]:
import pandas as pd

import duckdb

El objetivo de este notebook es crear nuevas clases dataset para distintas DAOs. Vamos a usar el dataset de Andrew en [Kaggle](https://www.kaggle.com/datasets/oiudoiajd/daos-census). Como las tablas son tan grandes (varios GB) no podemos usar `pd.read_parquet`

In [2]:
# Merging all tables into one database
db = duckdb.connect(database=':memory:', read_only=False)
db.execute("CREATE VIEW votes AS SELECT * FROM parquet_scan('{}')".format('data/daos-census/raw/votes.parquet'))
db.execute("CREATE VIEW proposals AS SELECT * FROM parquet_scan('{}')".format('data/daos-census/raw/proposals.parquet'))
db.execute("CREATE VIEW deployments AS SELECT * FROM parquet_scan('{}')".format('data/daos-census/raw/deployments.parquet'))

<duckdb.duckdb.DuckDBPyConnection at 0x7fab80057930>

In [3]:
def get_columns_from_table(table_name):
    return db.execute(f"""
        SELECT column_name, data_type FROM information_schema.columns WHERE table_name='{table_name}'
    """).fetchdf().set_index('column_name')['data_type']

print(get_columns_from_table('votes'))
print(get_columns_from_table('proposals'))

column_name
id                        VARCHAR
proposal_id               VARCHAR
deployment_id             VARCHAR
platform_vote_id          VARCHAR
voter                     VARCHAR
date                    TIMESTAMP
choice                    VARCHAR
weight              DECIMAL(38,4)
Name: data_type, dtype: object
column_name
id                        VARCHAR
deployment_id             VARCHAR
platform_proposal_id      VARCHAR
author                    VARCHAR
date                    TIMESTAMP
votes_count                BIGINT
Name: data_type, dtype: object


In [4]:
db.execute("""
SELECT * FROM deployments
ORDER BY proposals_count DESC
LIMIT 30
""").fetchdf()

Unnamed: 0,id,platform,platform_deployment_id,name,website,additional,votes_count,proposals_count
0,c37abcdd-a36b-51fc-8fa6-2c3ecb780df0,daohaus,0x1b975a9daf25e7b01e0a6c72d657ff74925327a8,DEAD FoundationsDAO,,network: xdai version: 2.0,17738,29681
1,e2ec62ae-9b29-5f3a-a541-58678bb64a1b,snapshot,cakevote.eth,PancakeSwap,https://snapshot.org/#/cakevote.eth,,532830,2744
2,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,snapshot,snapshot.dcl.eth,Decentraland,https://snapshot.org/#/snapshot.dcl.eth,decentraland,116560,1962
3,5deda895-4531-539a-b7dc-88797456ded7,daostack,0xe716ec63c5673b3a4732d22909b38d779fa47c3f,xDXdao,,,5913,1438
4,17dba91a-e68f-50f6-8c1d-2d7fa57ab5fb,snapshot,index-coop.eth,Index Coop,https://snapshot.org/#/index-coop.eth,"indexcoop, https://www.indexcoop.com/",24031,918
5,d3229595-ac4c-55bf-9808-386fc5603bd1,daostack,0x519b70055af55a007110b4ff99b0ea33071c720a,dxDAO,,,2566,918
6,d8a985e4-3cc2-542a-9cb4-3e024577fb3c,realms,GovER5Lthms3bLBqWub97yVrMmEogzX7xNjdXpPPCVZw_G...,UXDProtocol,,,2562,916
7,7bfe40ed-eb25-546d-b5e0-7db022252f49,daohaus,0x7961b6c69ec7d7203cbecc4f3bf30755713a3822,DAOSquare Grants,,network: xdai version: 2.1,702,880
8,51625fff-ef9e-58cd-9dda-0dc68ab75bb2,aragon,0x1c26fde2ce92cd1d932a0afb8367108eac7f369b,,https://etherscan.io/address/0x1c26fde2ce92cd1...,,1331,860
9,4ea69b08-fa7f-5daa-96d5-9515ded0076b,snapshot,gm365.eth,gm DAO,https://snapshot.org/#/gm365.eth,,91546,730


In [5]:
def best_daos_for_recsys(min_proposals=300, min_votes=1000, min_voters=50, min_density=0.0001, order_by='proposals_count', limit=10000):
    df = db.execute(f"""
    WITH G AS (
        SELECT deployments.*, COUNT(DISTINCT votes.voter) AS voters_count
        FROM deployments
        LEFT JOIN votes ON (deployments.id = votes.deployment_id)
        WHERE proposals_count > {min_proposals}
        GROUP BY deployments.*
    )
    SELECT *, 
        voters_count + proposals_count AS nodes, 
        2*(votes_count)/((voters_count + proposals_count)*(voters_count + proposals_count-1)) AS density, 
        votes_count/proposals_count AS vpp, 
        votes_count/voters_count AS vpv
    FROM G
    WHERE density > {min_density} AND voters_count > {min_voters} AND votes_count > {min_votes}
    ORDER BY {order_by} DESC
    LIMIT {limit}
    """).fetchdf()

    df['milli density'] = 1000 * df['density']

    return df

best_daos_for_recsys(min_votes=5000).head(20)

Unnamed: 0,id,platform,platform_deployment_id,name,website,additional,votes_count,proposals_count,voters_count,nodes,density,vpp,vpv,milli density
0,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,snapshot,snapshot.dcl.eth,Decentraland,https://snapshot.org/#/snapshot.dcl.eth,decentraland,116560,1962,7268,9230,0.002737,59.408767,16.037424,2.736675
1,5deda895-4531-539a-b7dc-88797456ded7,daostack,0xe716ec63c5673b3a4732d22909b38d779fa47c3f,xDXdao,,,5913,1438,92,1530,0.005055,4.111961,64.271739,5.055207
2,17dba91a-e68f-50f6-8c1d-2d7fa57ab5fb,snapshot,index-coop.eth,Index Coop,https://snapshot.org/#/index-coop.eth,"indexcoop, https://www.indexcoop.com/",24031,918,2871,3789,0.003349,26.17756,8.370254,3.348631
3,4ea69b08-fa7f-5daa-96d5-9515ded0076b,snapshot,gm365.eth,gm DAO,https://snapshot.org/#/gm365.eth,,91546,730,7711,8441,0.00257,125.405479,11.872131,2.569999
4,5ecb2cca-8b8a-5c67-8867-569e5ca0a8ae,snapshot,maturka.eth,9K DAO,https://snapshot.org/#/maturka.eth,https://t.me/Pro_Deng1,102321,592,8170,8762,0.002666,172.839527,12.52399,2.665863
5,6b341057-a48e-59a4-96d9-eb42e0e4f602,snapshot,balancer.eth,Balancer,https://snapshot.org/#/balancer.eth,"balancer, https://balancer.fi/",111987,509,9107,9616,0.002422,220.013752,12.296805,2.422445
6,6da30872-de18-5f44-92cf-9d9f2556b9c4,snapshot,bancornetwork.eth,Bancor,https://snapshot.org/#/bancornetwork.eth,"bancor, https://bancor.network",30942,500,1092,1592,0.024432,61.884,28.335165,24.432344
7,792aa223-1e66-51a1-bbf1-1c1dc47cd7e8,snapshot,ppyaa.eth,PaoPao,https://snapshot.org/#/ppyaa.eth,,28173,428,550,978,0.05897,65.824766,51.223636,58.969802
8,9563b722-69c8-5d1c-86a3-1cdec1e9a966,snapshot,eth6888.eth,lovebnbdao,https://snapshot.org/#/eth6888.eth,,5045,420,2163,2583,0.001513,12.011905,2.332409,1.512901
9,377e7181-37e0-504d-b494-d26df613abdc,snapshot,aavegotchi.eth,Aavegotchi,https://snapshot.org/#/aavegotchi.eth,"aavegotchi, https://aavegotchi.com",305499,407,12310,12717,0.003778,750.611794,24.817141,3.778371


In [6]:
def best_orgs_for_recsys(min_proposals=300, min_voters=50, min_density=0.0005, order_by='proposals_count', limit=10000):
    df = db.execute(f"""
    WITH G AS (WITH Gv AS (
            SELECT *
            FROM deployments
            LEFT JOIN votes ON (deployments.id = votes.deployment_id)
        )
        SELECT 
            name,
            COUNT(DISTINCT Gv.id) AS n_deploys,
            LIST(DISTINCT Gv.id) AS deploys,
            COUNT(DISTINCT Gv.proposal_id) AS proposals_count,
            COUNT(DISTINCT Gv.voter) AS voters_count,
            COUNT(*) AS votes_count,
        FROM Gv
        GROUP BY Gv.name
    )
    SELECT
        *,
        proposals_count + voters_count AS nodes,
        2*(votes_count)/((voters_count + proposals_count)*(voters_count + proposals_count-1)) AS density, 
        votes_count/proposals_count AS vpp, 
        votes_count/voters_count AS vpv
    FROM G
    WHERE density > {min_density} AND proposals_count > {min_proposals} AND voters_count > {min_voters}
    ORDER BY proposals_count DESC
    LIMIT 100000
    """).fetchdf().dropna()

    df['milli density'] = 1000 * df['density']

    return df

best_orgs_for_recsys()

Unnamed: 0,name,n_deploys,deploys,proposals_count,voters_count,votes_count,nodes,density,vpp,vpv,milli density
0,Decentraland,3,"[7117d5c4-de0e-5e4c-872d-40c3bd4b0200, 36a86d4...",2060,7334,116880,9394,0.002649,56.737864,15.936733,2.649204
1,xDXdao,1,[5deda895-4531-539a-b7dc-88797456ded7],1380,92,5913,1472,0.005462,4.284783,64.271739,5.461569
2,Index Coop,2,"[17dba91a-e68f-50f6-8c1d-2d7fa57ab5fb, a25032f...",874,2871,24032,3745,0.003428,27.496568,8.370603,3.427932
3,dxDAO,1,[d3229595-ac4c-55bf-9808-386fc5603bd1],846,134,2566,980,0.005349,3.033097,19.149254,5.349065
4,Aave,3,"[cff38513-293c-55b1-862f-7b4b4fa29e5f, b6c95a5...",725,77921,2053071,78646,0.000664,2831.822069,26.348109,0.663875
5,gm DAO,3,"[c3a8aa6c-1652-5834-a355-954bc422f1af, 4ea69b0...",710,7712,91548,8422,0.002582,128.940845,11.870851,2.581665
6,9K DAO,1,[5ecb2cca-8b8a-5c67-8867-569e5ca0a8ae],590,8170,102321,8760,0.002667,173.425424,12.52399,2.667081
7,WEALTHDAO,1,[0c40474f-e4a0-540f-ab2f-0451336464f7],585,1041,4008,1626,0.003034,6.851282,3.850144,3.033778
8,MetaCartel Ventures,3,"[b96c148e-6dbe-588c-bc2e-6be4b3fcea80, 899cc74...",579,124,2132,703,0.00864,3.682211,17.193548,8.640219
9,HUWA-DAO,1,[7b930871-5e1b-5efe-af9a-dc5413460604],572,1331,4151,1903,0.002294,7.256993,3.118708,2.293683


In [7]:
def get_deploys_with_name(name: str):
    return db.execute(f"""
    SELECT *
    FROM deployments
    WHERE name='{name}'
    """).fetchdf()

get_deploys_with_name('Decentraland')

Unnamed: 0,id,platform,platform_deployment_id,name,website,additional,votes_count,proposals_count
0,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,snapshot,snapshot.dcl.eth,Decentraland,https://snapshot.org/#/snapshot.dcl.eth,decentraland,116560,1962
1,36a86d4c-690b-5da9-87b5-cf2230328b17,aragon,0xf47917b108ca4b820ccea2587546fbb9f7564b56,Decentraland,https://etherscan.io/address/0xf47917b108ca4b8...,,319,135
2,7117d5c4-de0e-5e4c-872d-40c3bd4b0200,governor,c584e902-7b8f-4fb0-b742-8fa364c94940,Decentraland,,,0,0


In [9]:
# from src.datasets import DAOCensus

# data = DAOCensus("./data/daos-census", 'Decentraland', 'snapshot')
# data[0]

## How many proposals are in total in each platform

In [18]:
db.execute("""
SELECT platform, COUNT(*) AS nproposals
FROM proposals 
LEFT JOIN deployments ON (deployments.id = proposals.deployment_id)
GROUP BY platform
LIMIT 10
""").fetchdf()

Unnamed: 0,platform,nproposals
0,daostack,3571
1,snapshot,124597
2,tally,7542
3,aragon,15238
4,governor,740
5,daohaus,46632
6,realms,9551


## How many votes are in total in each platform

In [20]:
db.execute("""
SELECT platform, COUNT(*) AS nproposals
FROM votes 
LEFT JOIN deployments ON (deployments.id = votes.deployment_id)
GROUP BY platform
LIMIT 10
""").fetchdf()

Unnamed: 0,platform,nproposals
0,daostack,12328
1,tally,555943
2,snapshot,20944233
3,governor,412714
4,aragon,26176
5,daohaus,49454
6,realms,31599
