In [None]:
import requests
import json
import os
import tempfile
import re
import pathlib
import pandas as pd


Please put your API key and clowder base url here.

In [None]:
key="f40d04bd-05cd-4514-a7c4-1b6f7d65099c"
url="https://clowder.ncsa.illinois.edu/clowder" # url to clowder without the slash and api"

Create some variables we use later

In [None]:
base_headers = {'X-API-key': key}
headers = {**base_headers, 'Content-type': 'application/json',
           'accept': 'application/json'}
clowder_base_uri = f"{url}/api"

List out all spaces you have access to

In [None]:
def list_spaces():
    r = requests.get(clowder_base_uri + '/spaces/canEdit', headers=headers)
    r.raise_for_status()
    return pd.DataFrame.from_dict(r.json())

list_spaces()

Unnamed: 0,id,name,description,created
0,63c8414ce4b083a8c2580991,dt1,trial,Wed Jan 18 18:58:20 UTC 2023


List out all datasets

In [None]:
def list_datasets():
    r = requests.get(clowder_base_uri + '/datasets', headers=headers)
    r.raise_for_status()
    return pd.DataFrame.from_dict(r.json())

list_datasets()

Unnamed: 0,id,name,description,created,thumbnail,authorId,spaces,resource_type
0,63c84b8ae4b083a8c2580a41,dataset 2,,Wed Jan 18 19:42:02 UTC 2023,,635bfd4db7c7f35aca68df66,[63c8414ce4b083a8c2580991],dataset
1,63c84b5be4b083a8c2580a26,dataset1,,Wed Jan 18 19:41:15 UTC 2023,,635bfd4db7c7f35aca68df66,[63c8414ce4b083a8c2580991],dataset


List out all files in a particular dataset.
It takes datasetId as parameter.

In [None]:
def list_files_in_dataset(datasetId):
    r = requests.get(clowder_base_uri + '/datasets/' + datasetId + '/listAllFiles', headers=headers)
    r.raise_for_status()
    return pd.DataFrame.from_dict(r.json())

list_files_in_dataset('63c84b8ae4b083a8c2580a41')

Unnamed: 0,size,date-created,id,contentType,filename
0,57,Wed Jan 18 19:42:12 UTC 2023,63c84b94e4b083a8c2580a45,text/plain,a2.txt


Retrives the url to download the contents of a dataset as zip. It takes datasetId as parameter.
Returns:

*   url - url to download from
*   filename - zip file name



In [None]:
def get_dataset_download_link(datasetId):
    # get dataset name
    r = requests.get(f"{clowder_base_uri}/datasets/{datasetId}", headers=headers)
    r.raise_for_status()
    print(r.json())
    filename = r.json()["name"] + ".zip"
    url = f"{clowder_base_uri}/datasets/{datasetId}//download?bagit=false&compression=-1&tracking=true"
    return {"url": url, "filename": filename}


get_dataset_download_link('63c84b8ae4b083a8c2580a41')

{'id': '63c84b8ae4b083a8c2580a41', 'name': 'dataset 2', 'description': '', 'created': 'Wed Jan 18 19:42:02 UTC 2023', 'thumbnail': None, 'authorId': '635bfd4db7c7f35aca68df66', 'spaces': ['63c8414ce4b083a8c2580991'], 'resource_type': 'dataset'}


{'url': 'https://clowder.ncsa.illinois.edu/clowder/api/datasets/63c84b8ae4b083a8c2580a41//download?bagit=false&compression=-1&tracking=true',
 'filename': 'dataset 2.zip'}


Retrives the url to download a file. It takes fileId as parameter.

Returns:



*   url - url to download from

*   filename - original filename
*   bytes - size of the file




In [None]:
def get_file_download_link(fileId):
    # get file name
    r = requests.get(f"{clowder_base_uri}/files/{fileId}/metadata?key={key}")
    r.raise_for_status()
    filename = r.json()["filename"]
    bloburl = f"{clowder_base_uri}/files/{fileId}/blob?key={key}"
    return {"url": bloburl, "filename": filename, "bytes": r.json()["size"]}

get_file_download_link('63c84b73e4b083a8c2580a2a')

{'url': 'https://clowder.ncsa.illinois.edu/clowder/api/files/63c84b73e4b083a8c2580a2a/blob?key=f40d04bd-05cd-4514-a7c4-1b6f7d65099c',
 'filename': 'a1.txt',
 'bytes': '55'}

Download dataset/file to local disk.

Paramaets:

*   url - url to download from

*   path - user can specify path of the file
*   chunkSize - user can specify the chunk size which will denote the download speed. the file/dataset will be downloaded in the provided sized chunks


In [None]:
def download_to_disk(url, inputfilename = None, chunk_size = 10 * 1024):
    result = requests.get(url, stream=True)

    if inputfilename is None:
      (inputfile, inputfilename) = tempfile.mkstemp(suffix='.txt')
    else:
      inputfile = os.open(inputfilename, os.O_WRONLY | os.O_CREAT, 0o600)
      current_dir = os.getcwd()
      inputfilename = os.path.join(current_dir, inputfilename)
    try:
        with os.fdopen(inputfile, "wb") as outputfile:
            for chunk in result.iter_content(chunk_size):
                outputfile.write(chunk)
        return inputfilename
    except Exception:
        os.remove(inputfilename)
        raise

result = get_file_download_link('63c84b73e4b083a8c2580a2a')
download_to_disk(result["url"], result["filename"], 10 * 1024)

'/content/a1.txt'