In [None]:
# preliminary test to try and make a connection

import requests
import lxml.etree as etree


# original from https://releases.dataone.org/online/api-documentation-v2.0/apis/MN_APIs.html
# export NODE="https://demo2.test.dataone.org/knb/d1/mn"
# curl -k -v "$NODE/v1/monitor/ping"
#  -k proceed if connection cannot be verified
#  -v verbose

# try to go after https://search.dataone.org/view/10.24431/rw1k1b
# Seth Danielson, Kris Holderied, Molly McCammon, and Katrina Hoffman. 
# CTD profile time series data from the GAK1 project, 2012-2016, 
# Gulf Watch Alaska Environmental Drivers Component. Research Workspace. 10.24431/rw1k1b.
# download button says:
#   https://dataone.researchworkspace.com/mn/v2/packages/application%2Fbagit-097/ba358d8c-43ec-4919-a11a-5c7518ffd0c4
# https://doi.org/10.24431/rw1k1b

url = 'https://dataone.researchworkspace.com/mn/v2/'
task = 'monitor/ping'

resp = requests.get(url+task)
if resp.status_code != 200:
    # This means something went wrong.
    #raise requests.HTTPError('GET /tasks/ {}'.format(resp.status_code))
    print('did not work')


In [None]:
# now try and get more information

task = 'node'
resp = requests.get(url+task)
resp

task = 'object/ba358d8c-43ec-4919-a11a-5c7518ffd0c4'
resp = requests.get(url+task)
resp.content

x = etree.fromstring(resp.content )

print(etree.tostring(x, pretty_print=True, encoding = "unicode"))


# ouch. that's the object and not very human readable.
# RDF = Resource Description Framework - intimate with URIs
# this one is probably Data Catalog Vocabulary (DCAT) which uses a whole bunch of 
# preexisting vocabularies includng Dublin Core (purl, dcterms) and Friend Of A Friend 
# (foaf).  I think the x:y identifies the vocabulary and the term.

# task = 'object/10.24431%2Frw1k1b'



In [None]:
# views?

# task = 'views/title/ba358d8c-43ec-4919-a11a-5c7518ffd0c4'
task = 'node'


resp = requests.get(url+task)
x = etree.fromstring(resp.content )

print(etree.tostring(x, pretty_print=True, encoding = "unicode"))

# So the Axiom member node doesn't have all the API bits - like they don't have views


In [None]:
# but some of the goobly-gook returned in the object info does include a URL for 

# this gets actual data (tab delimited?)
# resp = requests.get('https://cn.dataone.org/cn/v1/resolve/e24107a7-8905-4f55-8c75-8b0831e47a83')
# resp.content

# this gets info on the project
resp = requests.get('https://cn.dataone.org/cn/v1/resolve/10.24431%2Frw1k1b')
x = etree.fromstring(resp.content )

print(etree.tostring(x, pretty_print=True, encoding = "unicode"))

# Schemas from the Resource site for ISO/TC 211 Geographic information/Geomatics
#  but they give a lot of 404, so not up-to-date
# This site lists them all 
# https://geo-ide.noaa.gov/wiki/index.php?title=ISO_Namespaces#Namespace_Descriptions
# It's ISO 19115 !

In [None]:
resp = requests.get('https://cn.dataone.org/cn/v2/views/')
x = etree.fromstring(resp.content )
print(etree.tostring(x, pretty_print=True, encoding = "unicode"))

resp = requests.get("https://cn.dataone.org/cn/v2/views/'default'/10.24431%2Frw1k1b")
# x = etree.fromstring(resp.content )
# print(etree.tostring(x, pretty_print=True, encoding = "unicode"))
resp.content


In [None]:
# what are all the datasets available from the Axiom member node
task = 'object/'
resp = requests.get(url+task)
resp.content

x = etree.fromstring(resp.content )

print(etree.tostring(x, pretty_print=True, encoding = "unicode"))



In [None]:
# what queries can I do?
task = 'query/'
resp = requests.get(url+task)
x = etree.fromstring(resp.content )
print(etree.tostring(x, pretty_print=True, encoding = "unicode"))
# Oh, that's right.  Axiom member node doesn't have all API - doesn't have query

resp = requests.get('https://cn.dataone.org/cn/v2/query/')
x = etree.fromstring(resp.content )
print(etree.tostring(x, pretty_print=True, encoding = "unicode"))

# solr = Solr is a standalone enterprise search server with a REST-like API (Apache)
# logsolr = DataONE usage logging


In [None]:
# what is it actually doing?  Use logging to find out
sabotage

import logging

# Enabling debugging at http.client level (requests->urllib3->http.client)
# you will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
# the only thing missing will be the response.body which is not logged.
try: # for Python 3
    from http.client import HTTPConnection
except ImportError:
    from httplib import HTTPConnection
HTTPConnection.debuglevel = 1

logging.basicConfig() # you need to initialize logging, otherwise you will not see anything from requests
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True

resp = requests.get(url+task)
resp