In [47]:
import json
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import os

# Suppress only the single InsecureRequestWarning from urllib3 needed
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

In [64]:
# Configuration
# Notice that Jupyter is listening not on Nginx port 8443 but on Docker's std CKAN's port 5000.
# On the other hand, a dataset might be created from elsewhere (i.e. via localhost), and this 'old' URL
# is used to create a new URL for download. 
CKAN_URL = 'http://ckan:5000'
CKAN_URL_EXT = 'https://localhost:8443'
API_KEY = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqdGkiOiJmZlVPQXlqUExjc0xETGh3UXNmbldiTkdiZG1BM0x1eXB0ZXpYNUF6YzVjIiwiaWF0IjoxNzA2NjQ5NTAzfQ.YzEjD8riEw0coGfMtBVpp9RLTR33TkYtizWfBnQLfdM'
IS_CERT = False
DOWNLOAD_PATH = 'work/download'

In [65]:
# Function to create a dataset
def create_dataset(ckan_url, api_key, dataset_dict):
    headers = {'Authorization': api_key}
    response = requests.post(f"{ckan_url}/api/action/package_create", headers=headers, json=dataset_dict, verify=IS_CERT)
    return response.json()

def upload_resource(ckan_url, api_key, dataset_id, resource_path):
    headers = {'Authorization': api_key}
    files = {'upload': open(resource_path, 'rb')}
    data = {
        'package_id': dataset_id,
        'name': resource_path.split('/')[-1]  # Extracts file name
    }
    response = requests.post(f"{ckan_url}/api/action/resource_create", headers=headers, files=files, data=data, verify=IS_CERT)
    return response.json()

# Function to download a dataset
def download_dataset(ckan_url, ckan_url_ext, dataset_id, download_folder):
    response = requests.get(f"{ckan_url}/api/action/package_show?id={dataset_id}", verify=IS_CERT)
    if response.status_code == 200:
        dataset = response.json()['result']
        if not os.path.exists(download_folder):
            os.makedirs(download_folder)
        for resource in dataset['resources']:
            current_res_url = resource['url'].replace(ckan_url_ext, ckan_url)
            print(f"Downloading resource {current_res_url}")
            download = requests.get(current_res_url, verify=IS_CERT)
            file_path = os.path.join(download_folder, resource['name'])
            with open(file_path, 'wb') as f:
                f.write(download.content)
        print("Download completed.")
    else:
        print("Failed to download dataset.")

def get_dataset_id(ckan_url, api_key, dataset_name):
    headers = {'Authorization': api_key}
    response = requests.get(f"{ckan_url}/api/action/package_show?id={dataset_name}", headers=headers, verify=IS_CERT)
    if response.status_code == 200:
        return response.json()['result']['id']
    else:
        return None

In [66]:
response = requests.get(f"{CKAN_URL}/api/3/action/organization_list", verify=IS_CERT)
organizations = response.json()["result"]
response = requests.get(f"{CKAN_URL}/api/3/action/organization_show?id={organizations[0]}", verify=IS_CERT)
organization_id = response.json()["result"]["id"]

In [67]:

# Example usage
example_dataset = {
    'name': 'example-dataset',
    'title': 'Example Dataset',
    'notes': 'This is an example dataset.',
    'owner_org': organization_id  # Replace with the actual ID
    # Add other required fields and additional metadata as needed
}

# Check if dataset exists
dataset_name = example_dataset['name']
dataset_id = get_dataset_id(CKAN_URL, API_KEY, dataset_name)

# If dataset does not exist, create it
if not dataset_id:
    create_response = create_dataset(CKAN_URL, API_KEY, example_dataset)
    print(create_response)
    
    # Assuming dataset creation was successful
    dataset_id = create_response['result']['id']
else:
    print(f"Dataset '{dataset_name}' already exists with ID {dataset_id}.")

Dataset 'example-dataset' already exists with ID c131236b-3b29-49c6-8ea5-f0695df3f41b.


In [68]:
import ipywidgets as widgets

uploader = widgets.FileUpload(
    accept='',  # Specify file types, e.g., '.txt', '.pdf', etc.
    multiple=False  # Allow multiple files to be uploaded
)
display(uploader)

FileUpload(value=(), description='Upload')

In [69]:
# Check if a file has been uploaded
if uploader.value:
    # Get the uploaded file; this method works with multiple=False
    uploaded_file = next(iter(uploader.value))
    file_path = f"work/upload/{uploaded_file['name']}"
    with open(file_path, "wb") as fp:
        fp.write(uploaded_file.content)
else:
    print('uploader empty')

In [70]:
upload_response = upload_resource(CKAN_URL, API_KEY, dataset_id, file_path)
print(upload_response)



{'help': 'https://localhost:8443/api/3/action/help_show?name=resource_create', 'success': True, 'result': {'cache_last_updated': None, 'cache_url': None, 'created': '2024-01-30T23:55:39.934549', 'datastore_active': False, 'description': None, 'format': 'XML', 'hash': '', 'id': 'a97a8f91-c87c-4f13-b929-a93e2351de4a', 'last_modified': '2024-01-30T23:55:39.911174', 'metadata_modified': '2024-01-30T23:55:39.930602', 'mimetype': 'text/xml', 'mimetype_inner': None, 'name': 'test-dataset.xml', 'package_id': 'c131236b-3b29-49c6-8ea5-f0695df3f41b', 'position': 7, 'resource_type': None, 'size': 2021, 'state': 'active', 'url': 'https://localhost:8443/dataset/c131236b-3b29-49c6-8ea5-f0695df3f41b/resource/a97a8f91-c87c-4f13-b929-a93e2351de4a/download/test-dataset.xml', 'url_type': 'upload'}}


In [71]:
# Download a dataset
print(CKAN_URL)
download_dataset(CKAN_URL, CKAN_URL_EXT, 'example-dataset', DOWNLOAD_PATH)

http://ckan:5000
Downloading resource http://ckan:5000/dataset/c131236b-3b29-49c6-8ea5-f0695df3f41b/resource/4c5dd34a-d53c-470c-b5d6-c1d4347b9be7/download/test_csv.csv
Downloading resource http://ckan:5000/dataset/c131236b-3b29-49c6-8ea5-f0695df3f41b/resource/594b7f7b-7f58-409d-a625-298b9d2cb17c/download/test_csv.csv
Downloading resource http://ckan:5000/dataset/c131236b-3b29-49c6-8ea5-f0695df3f41b/resource/ce2ba12a-a683-43ff-9850-99646df8d348/download/test_csv.csv
Downloading resource http://ckan:5000/dataset/c131236b-3b29-49c6-8ea5-f0695df3f41b/resource/0fe8099c-12aa-4b80-8c8f-8ba0ec4a9e8e/download/test_csv.csv
Downloading resource http://ckan:5000/dataset/c131236b-3b29-49c6-8ea5-f0695df3f41b/resource/5e977e09-92d9-4f55-8481-153e41b683bc/download/test_csv.csv
Downloading resource http://ckan:5000/dataset/c131236b-3b29-49c6-8ea5-f0695df3f41b/resource/132e6f60-72db-49c6-a956-67e9b27737f8/download/test-dataset.xml
Downloading resource http://ckan:5000/dataset/c131236b-3b29-49c6-8ea5-f06