In [1]:
import dropbox
from pathlib import Path
import os
from tqdm import tqdm
import time
import requests

In [2]:
def download_folder(dbx, folder, local_path, max_retries=3, delay=5):
    """
    dbx: dropbox.Dropbox object instance
    folder: Path to the Dropbox folder
    local_path: Local path where the files will be downloaded
    max_retries: Maximum number of retries if a download fails
    delay: Time to wait between retries
    """
    # List all files in the Dropbox folder
    result = dbx.files_list_folder(folder)

    # Iterate over all files in the Dropbox folder
    for entry in result.entries:
        # Construct the full local path
        local_file = Path(local_path) / entry.name

        # Make sure the local file's directory exists
        local_file.parent.mkdir(parents=True, exist_ok=True)

        # If the entry is a file, download it
        if isinstance(entry, dropbox.files.FileMetadata):
            # Open the local file in write-binary mode
            with local_file.open("wb") as f:
                retries = 0
                while retries < max_retries:
                    try:
                        # Download the Dropbox file to the local file
                        metadata, res = dbx.files_download(str(Path(folder) / entry.name))
                        f.write(res.content)
                        # If the download is successful, break out of the loop
                        break
                    except requests.exceptions.ReadTimeout:
                        # If a ReadTimeout occurs, wait for the delay and retry
                        print("ReadTimeout occurred, waiting for {} seconds before retrying...".format(delay))
                        time.sleep(delay)
                        retries += 1
        # If the entry is a folder, recurse
        elif isinstance(entry, dropbox.files.FolderMetadata):
            download_folder(dbx, str(Path(folder) / entry.name), str(local_file), max_retries, delay)

In [3]:
_dropbox_token = "sl.Bi54BkhvPAhT2Rqbo0bGU1jjMK5b_0hvQVmmoYMkX3fin5YBIRSbWdUIEPSWx37--OkmSrnYI1jN6gzEbEGoBn_cYR2ZnYHxuJCt8BP3BI-YVBTnNGpFbd44Fccx4aTJwd6GdDa977xeAlE"

In [4]:
# initialize a Dropbox object instance
dbx = dropbox.Dropbox(_dropbox_token)

years = [str(y) for y in range(2001, 2021)]

# check if exists
local_path = os.path.join(os.getcwd(), "inputs", "US_CRSP_NYSE")
if not os.path.exists(local_path):
    os.makedirs(local_path)

for year in tqdm(years, total=len(years), desc="Downloading CRSP Data"):

    # specify the Dropbox folder and the local path
    folder = "/US_CRSP_NYSE/Yearly/{}".format(year)
    year_local_path = os.path.join(local_path, year)

    # check if exists
    if not os.path.exists(year_local_path):
        os.makedirs(year_local_path)

    # call the download_folder function
    download_folder(dbx, folder, year_local_path)

Downloading CRSP Data:  35%|███▌      | 7/20 [47:21<1:27:57, 405.99s/it]


KeyboardInterrupt: 