This notebook runs everything necessary for my TFM

In [87]:
import os
from subprocess import call

import requests
import papermill as pm

def run_notebook(path, **kwargs):
    pm.execute_notebook(path, path, progress_bar=True, cwd=os.path.dirname(path), **kwargs)

def run_file(path):
    call(['python', path])

PLATFORMS = [
    'aragon',
    'daohaus',
    'daostack',
    # 'governor',
    # 'realms',
    'snapshot',
    # 'tally',
]

# Deployments

Getting the deployments

In [88]:
def get_files_from_daoanalyzer(org, dst_path, download_files):
    # Zenodo ID
    zenodo_id = 10372368
    zenodo_dotfile = os.path.join(dst_path, org, '.zenodo-id.txt')

    assert download_files, "download_files cant be empty"
    all_files_available = all( ( os.path.exists(os.path.join(dst_path, org, f)) for f in download_files ) )
    old_zenodo_id = -1

    if os.path.exists(zenodo_dotfile):
        with open(zenodo_dotfile, 'r') as f:
            old_zenodo_id = int(f.readline())

    if not all_files_available or zenodo_id != old_zenodo_id:
        # Download from Zenodo (no API key needed)
        url = f"https://zenodo.org/records/{zenodo_id}/files/archive.zip?download=1"

        archive = f'./DATA/archive-{zenodo_id}.zip'
        if not os.path.exists(archive):
            r = requests.get(url)
            r.raise_for_status()

            with open(archive, 'wb') as f:
                f.write(r.content)

        call(['unzip', '-oj', archive, *[f'{org}/{f}' for f in download_files], '-d', os.path.join(dst_path, org)])
        with open(zenodo_dotfile, 'w') as f:
            f.write(str(zenodo_id))
    else:
        print('All files already present')

## Aragon

In [89]:
get_files_from_daoanalyzer('aragon', 'deployments', ['organizations.csv', 'casts.csv', 'votes.csv'])
run_notebook('./deployments/aragon/aragon_get_deployments.ipynb')

All files already present


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]



## DAOhaus

In [90]:
get_files_from_daoanalyzer('daohaus', 'deployments', ['moloches.csv', 'votes.csv', 'proposals.csv'])
run_notebook('./deployments/daohaus/daohaus_get_deployments.ipynb')

All files already present


Executing:   0%|          | 0/25 [00:00<?, ?cell/s]



## DAOstack

In [91]:
get_files_from_daoanalyzer('daostack', 'deployments', ['daos.csv', 'proposals.csv', 'votes.csv'])
run_notebook('./deployments/daostack/daostack_get_deployments.ipynb')

All files already present


Executing:   0%|          | 0/19 [00:00<?, ?cell/s]



## Snapshot

In [92]:
run_notebook('./deployments/snapshot/snapshot_get_deployments.ipynb')

Executing:   0%|          | 0/8 [00:00<?, ?cell/s]



## Combine and analyze

In [93]:
run_notebook('./deployments/combine_and_analyze.ipynb')

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]



# Proposals

## Aragon

In [94]:
get_files_from_daoanalyzer('aragon', './proposals/', ['votes.csv'])
run_notebook('./proposals/aragon/get_aragon_proposals.ipynb')

All files already present


Executing:   0%|          | 0/8 [00:00<?, ?cell/s]



## DAOhaus

In [95]:
get_files_from_daoanalyzer('daohaus', './proposals/', ['proposals.csv'])
run_notebook('./proposals/daohaus/get_daohaus_proposals.ipynb')

All files already present


Executing:   0%|          | 0/8 [00:00<?, ?cell/s]



## DAOstack

In [96]:
get_files_from_daoanalyzer('daostack', './proposals/', ['proposals.csv', 'votes.csv'])
run_notebook('./proposals/daostack/get_daostack_proposals.ipynb')

All files already present


Executing:   0%|          | 0/13 [00:00<?, ?cell/s]



## Governor

In [97]:
# run_notebook('./proposals/governor/get_proposals.ipynb')

## Snapshot

In [98]:
run_notebook('./proposals/snapshot/download_all_proposals_new.ipynb')
run_notebook('./proposals/snapshot/process_snapshot_proposals.ipynb')

Executing:   0%|          | 0/12 [00:00<?, ?cell/s]



Executing:   0%|          | 0/12 [00:00<?, ?cell/s]



# Votes

## Aragon

In [99]:
get_files_from_daoanalyzer('aragon', './votes/', ['casts.csv'])
run_notebook('./votes/aragon/get_aragon_votes.ipynb')

All files already present


Executing:   0%|          | 0/5 [00:00<?, ?cell/s]



## DAOhaus

In [100]:
get_files_from_daoanalyzer('daohaus', './votes/', ['votes.csv'])
run_notebook('./votes/daohaus/get_daohaus_votes.ipynb')

All files already present


Executing:   0%|          | 0/8 [00:00<?, ?cell/s]



## DAOstack

In [101]:
get_files_from_daoanalyzer('daostack', './votes/', ['votes.csv'])
run_notebook('./votes/daostack/get_daostack_votes.ipynb')

All files already present


Executing:   0%|          | 0/4 [00:00<?, ?cell/s]



## Snapshot

To check progress:
- `audit_download.ipynb` shows `get_snapshot_votes.ipynb` progress. There should be 6798 files in the `votes_out` folder
- `audit_large_proposal_votes.ipynb` shows `get_large_proposal_votes.ipynb` progress. Look for one of the last cells that says "Total progress"

In [102]:
run_notebook('./votes/snapshot/get_snapshot_votes.ipynb')
run_notebook('./votes/snapshot/get_large_proposal_votes.ipynb')
# This takes quite some time, so we will run it only if necessary
if not os.path.exists('./votes/snapshot/snapshot_votes_small_proposals.csv'):
    run_notebook('./votes/snapshot/process_snapshot_votes.ipynb', log_output=True)

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]



Executing:   0%|          | 0/8 [00:00<?, ?cell/s]



# Concatenate data to "parquets"

In [105]:
if not os.path.exists('./parquets'):
    os.mkdir('./parquets')
run_notebook('./concatenate_DATA.ipynb', log_output=True)

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]



PapermillExecutionError: 
---------------------------------------------------------------------------
Exception encountered at "In [11]":
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[11], line 3
      1 import glob
----> 3 snapshot_votes = glob.glob('/users/personal/Downloads/DATA_onedrive_2023-07-25/votes/snapshot_*.parquet')[0]
      4 snapshot_votes

IndexError: list index out of range


In [106]:
import pandas as pd

pd.read_pickle("./votes/snapshot/large_proposals_and_votes.pkl")

[('0xda4f201a37ea08cf1892418e7b9e88f5687a68dbdc96c3ab22abaa1c7244648e',
  2765849),
 ('0xc18c4911e19adf4bd54c6ad142bb08bc81fd44dee723e520c511c2a9414d5920',
  514470),
 ('0xcfc335d45421a2d3b5f64d5ee19770ab103f539cedc240ba937691093b392bc4',
  510861),
 ('0xd6bb6427e285ae4e269ebb9f4d0396808d2de356c654c19877402da4e3e8c44a',
  510523),
 ('0x2eff53c9e9f0bf19a98345d1d32103ec0ec859d81978eecacb38db16be33b94b',
  472911),
 ('0xed5fe9d40c9e82a53c548d4458fdca91d6d26c12cc34303c205906b5e1854e29',
  470709),
 ('0x3ab405d7d396f0b3bf04b966dd12b06ad5e81d3cf583eda8b890f2d744a98693',
  470250),
 ('0xe8e92474bbd4ad0aa011cf69ad1873a9e30ab9c431f717e9d859a78fdd799a3a',
  456653),
 ('0x6078a960e2cb6b518dddc3773f7f40b30ab76bc6f2f3de2022eaafe78c8ee0a1',
  456232),
 ('0x73917ac1c09063a4776aea60176817f3aa3ee96b3a54f93ba9e9dddfc1add2ee',
  455835),
 ('0xf1274081c9f4db1db77f30f21b53f61dde8716f8d3f2aac7367709bd4af369b2',
  450500),
 ('0x3cb9430062af89937887e9d359e4765de4f138c6bcea49ee66225a8fa998e97b',
  449253),
 ('

# Clean the data

In [None]:
run_notebook('./parquet_versions/parquet_cleanup/remove_parquet_duplicate_deployments.ipynb')

run_notebook('./parquet_versions/parquet_cleanup/v3_add_deployment_uuids.ipynb')
run_notebook('./parquet_versions/parquet_cleanup/v3_add_proposal_uuids.ipynb')

# TODO: Now is just the necessary so proposals-text works, but I should clean also the deployments
# and votes parquets
run_notebook('./parquet_versions/parquet_cleanup/v6_fix_date_fields_proposals.ipynb')