In [None]:
import hoss
import os
import tempfile

import hoss.tools.download

## Connect to local server
This notebook demonstrates how to use the upload tool that is included in the hoss client library.

For these demo notebooks, it's assumed you're running against the system running in
dev mode and able to connect to localhost.

We start by connecting the the "local" server. If using a different server be sure to change the `.connect()` arg

In [None]:
server_local = hoss.connect('http://localhost')

In [None]:
print("Existing Namespaces:")
print(server_local.list_namespaces())

## Create a dataset
First load the default namespace and then create a dataset inside the namespace

In [None]:
ns = server_local.get_namespace('default')

In [None]:
ds = ns.create_dataset("download-test", "A dataset for a download tool example")

## Write test data to downlaod

Write a bunch of data into the dataset we just created. Also create a temporary directory to download the data into.

In [None]:
temp_dir = tempfile.TemporaryDirectory()

# Create a bunch of small files
for cnt in range(50):
    f1 = ds / f"file{cnt}.txt"
    f1.write_text('1234567890' * 1000)
        
# Create directory structure with a few bigger files
f1 = ds / "folder1" / "file-50.txt"
f1.write_text('1234567890' * 8000000)
f1 = ds / "folder1" / "file-51.txt"
f1.write_text('1234567890' * 6000000)
f1 = ds / "folder1" / "file-52.txt"
f1.write_text('1234567890' * 5000000)
f1 = ds / "folder1" / "subfolder2" / "file-53.txt"
f1.write_text('1234567890' * 8000000)
f1 = ds / "folder1" / "subfolder2" / "file-54.txt"
f1.write_text('1234567890' * 8000000)

## Run download tool

You can run the download tool as a function that even works in Jupyter.

You can also run the download tool from the command line. When you pip install the hoss client library, the program `hoss` is installed. The format of the command line interface is:

`hoss download <dataset name> <namespace name> <prefix> <absolute path to the download dir>`

In [1]:
!hoss download -h

Usage: hoss download [OPTIONS] DATASET NAMESPACE PREFIX DESTINATION

  Download files to a local directory from a prefix in a Dataset

  DATASET is the name of the dataset from which to download data

  NAMESPACE is the name of the namespace that contains the Dataset

  PREFIX is the prefix inside the dataset to download. Use "/" to indicate the
  root of the dataset.

  DESTINATION is the local directory to write files to

Options:
  -e, --endpoint TEXT            Hoss server root endpoint  [default:
                                 http://localhost]
  -r, --recursive                If set, download all files with the prefix.
                                 Otherwise, only download files at the same
                                 level as the prefix, assuming a `/` delimiter
                                 in the keys to represent 'directories'
                                 [default: False]
  -c, --max_concurrency INTEGER  max concurrency used when analyzing the
               

In [None]:
# Try downloading all of the data in the dataset using the prefix `/` and recursive=True
hoss.tools.download.download_prefix(ds.dataset_name, ns.name, "/", temp_dir.name, server_local.base_url, 
                                   recursive=True, max_concurrency=10, num_processes=1)

In [None]:
# Try downloading just folder of data
temp_dir2 = tempfile.TemporaryDirectory()
hoss.tools.download.download_prefix(ds.dataset_name, ns.name, "/folder1", temp_dir2.name, server_local.base_url, recursive=False)

## Clean up this example
Run these cells to remove the resources created during the test

In [None]:
temp_dir.cleanup()

In [None]:
temp_dir2.cleanup()

In [None]:
ns.delete_dataset("download-test")