# HDX Python API

This notebook is a little documentation on the hdx python API. Feel free to edit and add your insights!

[The Humanitarian Data Exchange (HDX)](https://data.humdata.org/) makes use of [CKAN tool](https://docs.ckan.org/en/2.9/user-guide.html#what-is-ckan) to publish its data.

- Documentation: https://hdx-python-api.readthedocs.io/en/latest/
- API: https://docs.ckan.org/en/2.9/api/legacy-api.html?highlight=search#search-resources

The code snippets here explores the hdx python api and "Dataset" object returned by it.

The notebook makes use of only one Dataset referenced by 'kenya-who-is-doing-what-and-where-2017' for the exploration.

In [27]:
# Installation of the api
%pip install hdx-python-api==5.9.3 

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [28]:
# Importing required libraraies from hdx
from hdx.utilities.easy_logging import setup_logging
from hdx.api.configuration import Configuration
from hdx.data.dataset import Dataset

# Logging in and getting read only access to https://data.humdata.org
# Read more about API Access token in documentation at https://hdx-python-api.readthedocs.io/en/latest/
# for writing access.

setup_logging()

Configuration.create(hdx_site="prod", user_agent="A_Quick_Example", hdx_read_only=True)

ConfigurationError: Configuration already created!

In [None]:
# Looking at class attributes for Database

print(type(Dataset))
Dataset.__dict__.keys()

<class 'abc.ABCMeta'>


dict_keys(['__module__', '__doc__', 'max_attempts', 'max_int', 'update_frequencies', '__init__', 'actions', '__setitem__', 'separate_resources', 'unseparate_resources', 'get_dataset_dict', 'save_to_json', 'load_from_json', 'init_resources', '_get_resource_from_obj', 'add_update_resource', 'add_update_resources', 'delete_resource', 'get_resources', 'get_resource', 'number_of_resources', 'reorder_resources', 'update_from_yaml', 'update_from_json', 'read_from_hdx', '_dataset_create_resources', '_dataset_load_from_hdx', 'check_required_fields', 'revise', '_save_dataset_add_filestore_resources', '_dataset_merge_hdx_update', 'update_in_hdx', 'create_in_hdx', 'delete_from_hdx', 'hxl_update', 'search_in_hdx', 'get_all_dataset_names', 'get_all_datasets', 'get_all_resources', 'autocomplete', 'get_date_of_dataset', 'set_date_of_dataset', 'set_dataset_year_range', 'list_valid_update_frequencies', 'transform_update_frequency', 'get_expected_update_frequency', 'set_expected_update_frequency', 'get_t

In [None]:
hdx_dataset_key = 'kenya-who-is-doing-what-and-where-2017'

# Reading the dataset from hdx
hdx_dataset = Dataset.read_from_hdx(hdx_dataset_key)

# print(hdx_dataset)

# Looking at class attributes for dataset class 
print(type(hdx_dataset))
hdx_dataset.__dict__.keys()


<class 'hdx.data.dataset.Dataset'>


dict_keys(['resources', 'old_data', 'configuration', 'data', 'preview_resourceview'])

In [None]:
# Extracting resources from the dataset
hdx_resources = hdx_dataset.resources

# print(hdx_resources)

# Looking at class attributes for resources 
print(type(hdx_resources))


<class 'list'>


In [None]:
# Extracting data from the dataset

hdx_data = hdx_dataset.data

# print(hdx_data)
print(hdx_data.keys())

# Looking at class attributes for data 
print(type(hdx_data))

dict_keys(['archived', 'creator_user_id', 'data_update_frequency', 'dataset_date', 'dataset_preview', 'dataset_source', 'has_geodata', 'has_quickcharts', 'has_showcases', 'id', 'is_requestdata_type', 'isopen', 'last_modified', 'license_id', 'license_title', 'license_url', 'maintainer', 'metadata_created', 'metadata_modified', 'methodology', 'name', 'notes', 'num_resources', 'num_tags', 'organization', 'owner_org', 'package_creator', 'pageviews_last_14_days', 'private', 'qa_checklist', 'qa_completed', 'review_date', 'solr_additions', 'state', 'subnational', 'title', 'total_res_downloads', 'type', 'updated_by_script', 'url', 'version', 'groups', 'tags', 'relationships_as_subject', 'relationships_as_object', 'is_fresh', 'update_status', 'x_resource_grouping'])
<class 'dict'>


In [None]:
# Extracting old data from the dataset
hdx_old_data = hdx_dataset.old_data

# print(hdx_old_data)

# Looking at class attributes for old data 
print(type(hdx_old_data))

<class 'dict'>


In [None]:
# Extracting configuration data from the dataset
hdx_configuration = hdx_dataset.configuration

# print(hdx_configuration)

# Looking at class attributes for configuration 
print(type(hdx_configuration))

ordereddict([('hdx_prod_site', ordereddict([('url', 'https://data.humdata.org')])), ('hdx_demo_site', ordereddict([('url', 'https://demo.data-humdata-org.ahconu.org')])), ('hdx_stage_site', ordereddict([('url', 'https://stage.data-humdata-org.ahconu.org')])), ('hdx_feature_site', ordereddict([('url', 'https://feature.data-humdata-org.ahconu.org')])), ('hdx_dev_site', ordereddict([('url', 'https://dev.data-humdata-org.ahconu.org')])), ('dataset', ordereddict([('required_fields', ['name', 'private', 'title', 'notes', 'dataset_source', 'owner_org', 'maintainer', 'dataset_date', 'data_update_frequency', 'groups', 'license_id', 'methodology', 'tags'])])), ('dataset-requestable', ordereddict([('required_fields', ['name', 'title', 'notes', 'dataset_source', 'owner_org', 'maintainer', 'dataset_date', 'data_update_frequency', 'groups', 'tags', 'field_names', 'file_types', 'num_of_rows'])])), ('resource', ordereddict([('required_fields', ['package_id', 'name', 'format', 'description', 'url_type'

In [None]:
# Extracting preview data from the dataset
hdx_preview_resourceview = hdx_dataset.preview_resourceview

# print(hdx_preview_resourceview)

# Looking at class attributes for preview resource view 
print(type(hdx_preview_resourceview))


None
<class 'NoneType'>
