diff --git a/.travis.yml b/.travis.yml index 028875447..5f9fced1c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -55,7 +55,7 @@ jobs: # check formatting - stage: formatting python: "3.7" - script: black --check podpac + script: black --check --diff podpac # deploy docs to `podpac-docs` repository. This script only pushes the docs on pushes to develop and master. - stage: docs deploy python: "3.7" diff --git a/CHANGELOG.md b/CHANGELOG.md index ed5469b59..c8bd8fddf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 2.0.0 + +# Breaking changes +* Renamed 'native_coordinates' to 'coordinates' ## 1.3.0 diff --git a/dist/aws/Dockerfile b/dist/aws/Dockerfile index 462bcd93b..b1ec82400 100644 --- a/dist/aws/Dockerfile +++ b/dist/aws/Dockerfile @@ -1,40 +1,60 @@ FROM amazonlinux:latest -ARG COMMIT_SHA="" -ARG TAG="" -RUN echo $COMMIT_SHA +ARG REF="master" -RUN yum update -y - -# Install apt dependencies -RUN yum install -y gcc gcc-c++ freetype-devel yum-utils findutils openssl-devel - -RUN yum -y groupinstall development +# development tools +RUN yum update -y && yum -y install \ + groupinstall \ + development \ + gcc \ + gcc-c++ \ + git \ + zip \ + freetype-devel \ + yum-utils \ + findutils \ + openssl-devel \ + && yum clean all # Mock current AWS Lambda docker image -# Find complete list of package https://gist.github.com/vincentsarago/acb33eb9f0502fcd38e0feadfe098eb7 -RUN yum install -y libjpeg-devel libpng-devel libcurl-devel ImageMagick-devel.x86_64 python3-devel.x86_64 which +# NOTE: this is still Py3.7, need to be careful about version management +RUN yum -y install \ + python3 \ + python3-pip \ + python3-devel \ + && yum clean all -ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH +# clone the podpac repository and checkout the requested tag +# for developers looking to create a custom deployment package or dependencies, +# comment this block and un-comment the next block +RUN git clone https://github.com/creare-com/podpac.git /podpac/ &&\ + pushd /podpac/ && \ + git fetch --all && \ + git checkout $REF && \ + popd -ADD . /podpac/ +# # uncomment this block to create a custom deployment package or dependencies archive +# # based on your local copy of the PODPAC repository +# # this command assumes you are building the Dockerfile using `build_lambda.sh` (which runs from the root of the PODPAC repository ) +# ADD . /podpac/ +# Install core, datatype and aws optional dependencies RUN mkdir /tmp/vendored/ && \ - cp /podpac/settings.json /tmp/vendored/settings.json && \ cd /podpac/ && rm -rf .git/ doc/ .github/ && \ - pip3 install -r dist/aws/aws_requirements.txt -t /tmp/vendored/ --upgrade + pip3 install . -t /tmp/vendored/ --upgrade && \ + pip3 install .[datatype] -t /tmp/vendored/ --upgrade && \ + pip3 install .[aws] -t /tmp/vendored/ --upgrade && \ + pip3 install .[algorithms] -t /tmp/vendored/ --upgrade +# need to add some __init__ files RUN cd /tmp/vendored/ && touch pydap/__init__.py && \ touch pydap/responses/__init__.py && \ touch pydap/handlers/__init__.py && \ touch pydap/parsers/__init__.py -RUN cp -r /podpac/ /tmp/vendored/ && \ - mv /tmp/vendored/podpac/dist/aws/handler.py /tmp/vendored/handler.py && \ - cp tmp/vendored/podpac/dist/aws/_mk_dist.py /tmp/vendored/_mk_dist.py && \ - rm -rf /tmp/vendored/podpac/dist/ && \ - cp -r /tmp/vendored/podpac/podpac/* /tmp/vendored/podpac/ && \ - rm -rf /tmp/vendored/podpac/podpac/* +# copy handler and _mk_dist: +RUN cp /podpac/dist/aws/handler.py /tmp/vendored/handler.py && \ + cp /podpac/dist/aws/_mk_dist.py /tmp/vendored/_mk_dist.py RUN cd /tmp/vendored && \ find * -maxdepth 0 -type f | grep ".zip" -v | grep -v ".pyc" | xargs zip -9 -rqy podpac_dist.zip diff --git a/dist/aws/README.md b/dist/aws/README.md index d91d6eaf6..5c74d6d42 100644 --- a/dist/aws/README.md +++ b/dist/aws/README.md @@ -17,19 +17,6 @@ The bucket itself is private, but each directory is made public individually. The following process is used to create new PODPAC distribution in the `podpac-dist` bucket when a new version of PODPAC is released. -- Run `build_lambda.sh`. Note this currently requires `settings.json` to copied to the root of the podpac directory. +- Run `build_lambda.sh` - Run `upload_lambda.sh` - Navigate to `podpac-dist` (or input bucket) and make the archives public - -## Handler - -...document handler.py... - -## Using Distribution - -...document podpac build process... - -## Debugging Lambda Function - -Use the script `print_logs.sh` to read cloud watch logs from your built lambda function. -This is currently the only way to debug your lambda function. diff --git a/dist/aws/aws_requirements.txt b/dist/aws/aws_requirements.txt deleted file mode 100644 index 95fd4ca7e..000000000 --- a/dist/aws/aws_requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -matplotlib>=2.1 -numpy>=1.14 -pint>=0.8 -scipy>=1.0 -traitlets>=4.3 -xarray>=0.10 -requests>=2.18 -beautifulsoup4>=4.6 -h5py>=2.9 -lxml>=4.2 -pydap>=3.2 -rasterio>=0.36 -pyproj>=2.4 -requests>=2.18 -numexpr>=2.6 -lazy-import>=0.2.2 -psutil -zarr>=2.3 -s3fs>=0.2 diff --git a/dist/aws/build_lambda.sh b/dist/aws/build_lambda.sh index a28030125..8fbf5ad8e 100644 --- a/dist/aws/build_lambda.sh +++ b/dist/aws/build_lambda.sh @@ -1,39 +1,35 @@ #!/bin/sh # -# Build podpac lambda distribution and dependencies -# -# Currently, this builds the function using the local -# podpac repository, including any outstanding changes. +# Build podpac lambda distribution and dependencies. +# Change $REF to specify a specific branch, tag, or commit in podpac to build from. # # Usage: # -# $ bash build_lambda.sh [s3-bucket] [function-name] +# $ bash build_lambda.sh # # Requires: # - Docker -# - `settings.json` to be copied to the root directory of the podpac repository -# This will not be required in the future -# -# Example usage: -# -# $ bash build_lambda.sh - # variables -COMMIT_SHA="$(git rev-parse HEAD)" -TAG="$(git describe --always)" +REF="master" +# REF="tags/1.1.0" # Change $REF to the branch, tag, or commit in podpac you want to use +# REF="develop" + DOCKER_NAME="podpac" -DOCKER_TAG=$TAG +DOCKER_TAG=$REF -echo "Creating docker image from podpac version ${TAG}" +echo "Creating docker image from podpac version ${REF}" echo "${DOCKER_NAME}:${DOCKER_TAG}" # Navigate to root, build docker, and extract zips pushd ../../ -docker build -f dist/aws/Dockerfile --no-cache --tag $DOCKER_NAME:$DOCKER_TAG --build-arg COMMIT_SHA="${COMMIT_SHA}" --build-arg TAG="${TAG}" . +docker build -f dist/aws/Dockerfile --no-cache --tag $DOCKER_NAME:$DOCKER_TAG --build-arg REF="${REF}" . docker run --name "${DOCKER_NAME}" -itd $DOCKER_NAME:$DOCKER_TAG docker cp "${DOCKER_NAME}":/tmp/vendored/podpac_dist.zip ./dist/aws docker cp "${DOCKER_NAME}":/tmp/vendored/podpac_deps.zip ./dist/aws docker stop "${DOCKER_NAME}" docker rm "${DOCKER_NAME}" popd + +echo "Built podpac deployment package: podpac_dist.zip" +echo "Built podpac dependencies: podpac_deps.zip" diff --git a/dist/aws/configure_lambda.sh b/dist/aws/configure_lambda.sh deleted file mode 100644 index 2b316298f..000000000 --- a/dist/aws/configure_lambda.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/sh -# -# Configure AWS for podpac lambda function -# -# Usage: -# -# $ bash configure_lambda.sh [s3-bucket] [function-name] -# -# Requires: -# - AWS CLI: https://docs.aws.amazon.com/cli/ -# - AWS credentials must be configured using the `aws` cli. -# See https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html#cli-quick-configuration -# - Dist and Dependencies uploaded using `upload_lambda.sh` -# - Function must be created from the AWS Dashboard -# - API Gateway must be created from the AWS Dashboard -# - Note down the `rest-api-id` and `resource-id` in parentheses in the top bar of the API gatway dashboard -# - You must Select the top level resource '/' and select "Create Method" -# Example usage: -# -# $ bash configure_lambda.sh podpac-s3 podpac_lambda h827as06ji 1ya7h6 -# - -# TODO: remove this in the future when function generation/update is automated elsewhere - -BUCKET=$1 -FUNCTION=$2 -API_ID=$3 -API_RESOURCE_ID=$4 -TAG="$(git describe --always)" - -if [ -z "$BUCKET" ] - then - echo "S3 bucket name required as first cli argument" - exit 1 - else - echo "Bucket: ${BUCKET}" -fi - -if [ -z "$FUNCTION" ] - then - echo "Function name required as second cli argument" - exit 1 - else - echo "Function: ${FUNCTION}" -fi - -if [ -z "$API_ID" ] - then - echo "Rest API ID required as third cli argument" - exit 1 - else - echo "REST API ID: ${API_ID}" -fi - -if [ -z "$API_RESOURCE_ID" ] - then - echo "API Resource ID required as fourth cli argument" - exit 1 - else - echo "API Resource ID: ${API_RESOURCE_ID}" -fi - -# Update lambda function to use the zips from S3 (uploaded above) -# aws lambda update-function-code --function-name $FUNCTION --s3-bucket $BUCKET --s3-key podpac/podpac_dist_$TAG.zip -# aws lambda update-function-configuration --function-name $FUNCTION --handler handler.handler --timeout 300 --memory-size 2048 -# aws apigateway update-rest-api --rest-api-id $API_ID --patch-operations "op=replace,path=/binaryMediaTypes/*~1*,value='*/*'" -RESOURCE=$(aws apigateway create-resource --rest-api-id $API_ID --parent-id $API_RESOURCE_ID --path-part 'lambda' --output text) -RESOURCE_ID=$(echo "$(echo $RESOURCE | cut -d " " -f1)") -aws apigateway put-method --rest-api-id $API_ID --resource-id $RESOURCE_ID --http-method ANY --authorization-type NONE - -echo "Log in to AWS and perform the following steps:" -echo "1. Navigate to your API in the API Gateway and select the resource /lambda HTTP Method (ANY)." -echo "2. Select Integration Request -> Lambda Function, Check Use Lambda Proxy Integration, Select your lambda function region and function name." -echo "3. Press the Actions dropdown and select Deploy API. Select [New Stage] and create a stage name (doesn't matter exact name)" -echo "4. Navigate to your lambda function console and confirm you see API Gateway as a trigger." - -# LAMBDA_URI=`$(aws lambda) -# aws apigateway put-integration --rest-api-id $API_ID --resource-id $API_RESOURCE_ID --http-method ANY --type AWS --integration-http-method POST --uri diff --git a/dist/aws/example.json b/dist/aws/example.json deleted file mode 100644 index 90d6d269c..000000000 --- a/dist/aws/example.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "pipeline": { - "SinCoords": { - "node": "core.algorithm.algorithm.SinCoords" - } - }, - "output": { - "format": "png", - "vmin": -1.2, - "vmax": 1.2, - }, - "coordinates": { - "TIME": "2018-03-03", - "BBOX": "-180.0,-90.0,180.0,90.0", - "WIDTH": 256, - "HEIGHT": 256 - } -} diff --git a/dist/aws/handler.py b/dist/aws/handler.py index adaae46d0..5e5055ffe 100644 --- a/dist/aws/handler.py +++ b/dist/aws/handler.py @@ -44,6 +44,10 @@ def default_pipeline(pipeline=None): else: pipeline = defaults + # overwrite certain settings so that the function doesn't fail + pipeline["settings"]["ROOT_PATH"] = "/tmp" + pipeline["settings"]["LOG_FILE_PATH"] = "/tmp" + return pipeline @@ -82,7 +86,7 @@ def parse_event(trigger, event): """ if trigger == "eval": - print("Triggered by Invoke") + print ("Triggered by Invoke") # event is the pipeline, provide consistent pipeline defaults pipeline = default_pipeline(event) @@ -90,7 +94,7 @@ def parse_event(trigger, event): return pipeline elif trigger == "S3": - print("Triggered from S3") + print ("Triggered from S3") # get boto s3 client s3 = boto3.client("s3") @@ -129,7 +133,7 @@ def parse_event(trigger, event): return pipeline elif trigger == "APIGateway": - print("Triggered from API Gateway") + print ("Triggered from API Gateway") pipeline = default_pipeline() pipeline["url"] = event["queryStringParameters"] @@ -154,8 +158,8 @@ def parse_event(trigger, event): # If we get here, the api settings were loaded pipeline["settings"] = {**pipeline["settings"], **api_settings} except Exception as e: - print("Got an exception when attempting to load api settings: ", e) - print(pipeline) + print ("Got an exception when attempting to load api settings: ", e) + print (pipeline) # handle OUTPUT in query parameters elif param == "output": @@ -195,7 +199,7 @@ def handler(event, context): ret_pipeline : bool, optional Description """ - print(event) + print (event) # Add /tmp/ path to handle python path for dependencies sys.path.append("/tmp/") @@ -227,13 +231,22 @@ def handler(event, context): os.environ.get("PODPAC_VERSION", pipeline["settings"].get("PODPAC_VERSION")) ) # this should be equivalent to version.semver() - # Download dependencies from specific bucket/object - s3 = boto3.client("s3") - s3.download_file(bucket, dependencies, "/tmp/" + dependencies) - subprocess.call(["unzip", "/tmp/" + dependencies, "-d", "/tmp"]) - sys.path.append("/tmp/") - subprocess.call(["rm", "/tmp/" + dependencies]) - # ----- + # Check to see if this function is "hot", in which case the dependencies have already been downloaded and are + # available for use right away. + if os.path.exists("/tmp/scipy"): + print ( + "Scipy has been detected in the /tmp/ directory. Assuming this function is hot, dependencies will" + " not be downloaded." + ) + else: + # Download dependencies from specific bucket/object + print ("Downloading and extracting dependencies") + s3 = boto3.client("s3") + s3.download_file(bucket, dependencies, "/tmp/" + dependencies) + subprocess.call(["unzip", "/tmp/" + dependencies, "-d", "/tmp"]) + sys.path.append("/tmp/") + subprocess.call(["rm", "/tmp/" + dependencies]) + # ----- # Load PODPAC @@ -285,7 +298,7 @@ def handler(event, context): try: json.dumps(body) except Exception as e: - print("Output body is not serializable, attempting to decode.") + print ("Output body is not serializable, attempting to decode.") body = body.decode() return { diff --git a/dist/aws/upload_lambda.sh b/dist/aws/upload_lambda.sh index 471a637af..5a399ec12 100644 --- a/dist/aws/upload_lambda.sh +++ b/dist/aws/upload_lambda.sh @@ -1,36 +1,28 @@ #!/bin/sh # # Upload podpac lambda distribution and dependencies -# -# Currently, this uploads the zip archives and updates -# the specific lambda function +# Change $BUCKET or $DIR to control the S3 Bucket and Bucket path +# where zip archives are uploaded. # # Usage: # -# $ bash upload_lambda.sh [s3-bucket] +# $ bash upload_lambda.sh # # Requires: # - AWS CLI: https://docs.aws.amazon.com/cli/ # - AWS credentials must be configured using the `aws` cli. # See https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html#cli-quick-configuration -# -# Example usage: -# -# $ bash upload_lambda.sh -BUCKET=podpac-dist -TAG="$(git describe --always)" -if [ ! -z "$1" ] - then - BUCKET=$1 -fi +BUCKET="podpac-dist" +DIR="dev" +# DIR="1.3.0" # for releases, upload to release path by semantic version -echo "Uploading podpac distribution to bucket: ${BUCKET}" +AWSPATH="s3://$BUCKET/$DIR" +echo "Uploading podpac distribution to S3 path: ${AWSPATH}" # Upload zips to S3 -aws s3 cp podpac_deps.zip s3://$BUCKET/$TAG/podpac_deps.zip -aws s3 cp podpac_dist.zip s3://$BUCKET/$TAG/podpac_dist.zip -# rm podpac_deps.zip podpac_dist.zip +aws s3 cp podpac_deps.zip $AWSPATH/podpac_deps.zip +aws s3 cp podpac_dist.zip $AWSPATH/podpac_dist.zip echo "Navigate to your bucket $BUCKET, select the zip archives you just uploaded and make them public" diff --git a/doc/notebooks/pipeline-from-JSON.ipynb b/doc/notebooks/pipeline-from-JSON.ipynb deleted file mode 100644 index a67488982..000000000 --- a/doc/notebooks/pipeline-from-JSON.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Populating the interactive namespace from numpy and matplotlib\n" - ] - } - ], - "source": [ - "# Set up interactive plotting using matplotlib, and load numpy\n", - "# %pylab ipympl\n", - "%pylab inline\n", - "import warnings\n", - "warnings.filterwarnings('ignore')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setup\n", - "## Import PODPAC dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import ipywidgets as widgets\n", - "import podpac\n", - "from podpac.datalib import smap" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Provide Earth Data Login Credentials\n", - "If you do not have an earth data login, or have not activated OpenDAP access, follow the [instructions here](https://creare-com.github.io/podpac-docs/user/earthdata.html)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Username: mpuecker\n", - "Password: ··················\n" - ] - } - ], - "source": [ - "import getpass\n", - "username = input(\"Username:\"); password = getpass.getpass('Password:')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 1: Set up and execute pipeline\n", - "## 1.0: Define the pipeline json" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "01a3eb168602430fb6597002e61d8e39", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Textarea(value='\\n{\\n \"nodes\": {\\n \"SMAP_SPL4SMAU\": {\\n \"node\": \"datalib.smap.SMAP\",\\n …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "value='''\n", - "{\n", - " \"nodes\": {\n", - " \"SMAP_SPL4SMAU\": {\n", - " \"node\": \"datalib.smap.SMAP\",\n", - " \"attrs\": {\n", - " \"base_url\": \"https://n5eil01u.ecs.nsidc.org/opendap/SMAP\",\n", - " \"product\": \"SPL4SMAU\",\n", - " \"version\": 4\n", - " },\n", - " \"interpolation\": \"nearest\"\n", - " }\n", - " },\n", - " \"output\": {\n", - " \"mode\": \"image\",\n", - " \"format\": \"png\",\n", - " \"vmin\": -1.2,\n", - " \"vmax\": 1.2,\n", - " \"node\": \"SMAP_SPL4SMAU\"\n", - " }\n", - "} \n", - "'''\n", - "pipeline_json = widgets.Textarea(description='Pipeline Definition', \n", - " disabled=False, \n", - " layout=widgets.Layout(width='50%', height='400px'),\n", - " placeholder='{}',\n", - " value=value\n", - " )\n", - "pipeline_json" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1.1: Create the pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "sm = podpac.pipeline.Pipeline(json=pipeline_json.value)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1.2: Set username and password for example node" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "sm.node.username = username\n", - "sm.node.password = password" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1.3: Evaluate and plot the node for the world" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(-181.0, 181.0, -91.0, 91.0)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAADuCAYAAAAtHCz/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzsnWeYHMW1sN/qnjy7OxulVc6AJJIAAwaMwQSTs8nJ4Bz48PU1trGNcbpg+9rG6ZpgEw0IBAZEDja2MTY5SCAQklCOu9rV7s5O7q7vx6npHUkbpU2S+n2efbanu7q6uqfn1KlTp85RWmt8fHx8fHYerKFugI+Pj49P/+ILdh8fH5+dDF+w+/j4+Oxk+ILdx8fHZyfDF+w+Pj4+Oxm+YPfx8fHZyfAFu4+Pj89Ohi/YfXx8fHYyfMHu4+Pjs5PhC3YfHx+fPqKU0n34e2qw2xcY7Av6+Pj47PhEUEzpVUnNu7UD3Jit8AW7j4+PT5+xgYqhbkSX+ILdx8fHp89YKGK9KjkUYRZ9we7j4+PTZywU8aFuRJf4gt3Hx8enzwRRjBjqRnSJL9h9fHx8+ojqgylmKPAFu4+Pj0+fUWCFelfUHdiWdIYv2H18fHz6igJl97KsL9h9fHx8dgzUMF7e6Qt2Hx8fn23BUkPdgi7xBbuPj49PX1FgBXtZNj2gLekUX7D7+Pj49BXFsI605Qt2Hx8fnz6i8G3sPj4+PjsXffGKGQJ8we7j4+OzLfiTpz4+Pj47EX2ZPB0CfMHu4+Pj01f8yVMfHx+fnQ9/8tTHx8dnJ0KhUL6N3cfHx2cnQiFJlIYpvmD38fHx2QaGsylmGDfNx8fHZ5iiQAV799djVUodp5RaqJRarJT6VifHf6WUesv8faCU2tRTnb7G7uPj49NXVP9o7EopG/g9cAywCnhVKTVXa72gWEZr/bWS8l8FZvVUr6+x+/j4+GwDylK9+uuBA4HFWusPtdY5YDZwajflzwPu7alSX7D7+Pj49JWiH3tv/qBWKfVayd/nSmoaA6ws+bzK7Nv6kkpNACYBf+upeb4pZhiilBoPLAASWmtnqNvj4+OzOX0MAtaotT6gm6q2RHdR9lzggd7IBF9jHwYopZYppY4uftZar9Balw1Xoa6UCiul/qSUWq6UalNKvamUOn6LMkcppd5XSqWUUs8bbaN47Gyl1L/Nsb93Uv8nlFJvKKValVIfbqHhdNaeieYaKXPNo0uOXaqUcpRSyZK/I7qp60hTV4tSalknxw9RSr1i7nueUuows//qkvozW1zzXVPmR0qp+UqpglLq2i3qPVEp9S+l1Cal1Dql1C1KqfJu2hlWSt1qntE6pdR/bXG8y+c/kHXtMpggYL3564FVwLiSz2OBNV2UPZdemGHAF+w+20YAGT5+HEgA3wPuV0pNBFBK1QJ/MfurgdeA+0rObwJuAK7fsmKlVBB4CLjJ1H0O8Eul1D7dtOde4E2gBvgO8IBSqq7k+H9MR1n8+3s3dbUDtwLf6KRt1cBc4OdAJfAz4FGlVJXW+n+K9QNf2OKaM00Vi4GrgMc7uW4C+DEwGpiO/MB/3k07rwWmAROAI4GrlFLHmXb29PwHsq5dBhVQvfrrgVeBaUqpSUqpECK85251LaV2B6qA//Smbb5gH2KUUncB4xEBkVRKXWU0UK2UCpgyf1dK/dhouUml1KNKqRql1N1Gy3q1KFRN+T2UUs8qpZqUuFGd3Z9t1lq3a62v1Vov01q7WuvHgKXA/qbIGcC7Wus5WusMIjj2UUrtYc5/Tmt9P51rJtVABXCXFl4F3gNmdNYWpdRuwH7A97XWaa31g8B84MxtvLdXtNZ3AR92cvgQYL25L0dr/Wegwdxvb+q+Q2v9JNDWybF7tNZPaa1TWutm4Bbg0G6quxj4kda6WWv9nil/qTnW7fMf4Lp2DYxXTG/+ukNrXQC+AjyNvOf3a63fVUr9UCl1SknR84DZWuuuzDSb4Qv2IUZrfRGwAjjZaHc/66LoucBFyMTKFKTnvg0RhO8B3wdQSsWBZ4F7gBHIC/F/SqmZndSJUur/zPC/s795vbkHpdRIYDfgXbNrJvB2yT22A0vM/m7RWq9HNPBPK6VspdRHEU3yX12cMhP4UGtdKizf3uJas5RSjUp8gL9X7DC3AcXWNlEF7LmN9XXH4XQ8T5RS31JKPWa2qxDN/u2S8qX33O3z78+6dml6P3naLVrrJ7TWu2mtp2itf2L2XaO1nltS5lqt9VY+7l3hT57uONymtV4CoJR6EpihtX7OfJ4D/MiUOwlYprW+zXx+Qyn1IHAWJYKiiNb6S8CXtrVRxnRyN3CH1vp9s7sM0WRLaQG6tBlvwb3AH4Ffm89f1Fqv7KJsmal7y2sVPQv+iQje5Ygwug8oANf1si2l/BsYrZQ6D3gAOB/pZGPbUFeXKKWOAS4BDiru01qXmq3KzP/S+y59vt0+//6sa5eln/zYB4ph3DSfLVhfsp3u5HPxBzoBOKhU8wYuAOr7u0FKKQu4C8ghw8kiScScUkoFnZggOqlzD0T4XgyEEGF8lVLqRHP83ZJJyY/1dC3jH7zUmIzmAz9EOrktJzxv7KltWuuNiI/xfyHP/zjgOWQCrF9QSh2MjLbO0lp/0EWxpPlfet+lz7cvz78/69plKHrFbK8pZqDwBfvwoFd2s16yEviH1rqy5K9Ma/3FzgorpW5Um3uMlP5tpeGXnKeAPwEjgTO11vmSw+8C+5SUjSOabZf1lbAnsFBr/bQRxguRycbjAbTWM0smJV8wdU7ewoNkn26upTHmlNIJT631F3rRNrTW/9Baf0RrXY2YxnYHXunNuT2hlJqFTJxdprX+azdtaAbWUvKM2fyee/38+7OuXQrVb5OnA4Iv2IcH64HJ/VTXY8BuSqmLlFJB8/cRpdT0zgprrb+whcdI6V93dtQ/IN4bJ2ut01scewjYUyl1plIqAlwDzCuaaoztPIKYAi2lVMSYdEC8W6YpcXlUSqkpiHnpbTrBaLVvAd839ZwO7A08aK51vJkDKI4Gvgc80tVNKaUs07agfFQR461QPD7LPNMK4H+BVVrrp7t5TqV1B03dFhAwddvm2J7AU8BXtdaP9qK6O4HvKqWqzH19FrjdHOv2+Q9wXbsMvsbu0xPXIT+sTUqp/96eiswk4rHIZOsaYB3wUyC83a00KPFj/jywL7CuRMO/wLShAfFK+QnQjNiKzy2p4iLEfPQH4GNm+xZz7hLgMuA3QCvwD0RI/6mbJp0LHGCudT1ixijahY8C5iml2oEnENe9/+mmrsNNe55AvJXSwDMlx68CGpGR0Sjg9G7q2pJbTH3nIW6ZaeRZAHwdqAP+1NmIyZiNniyp6/vIJOZy5Bn9XGv9FPT8/Puzrl2Wvq08Hfzm9dJ7xsfHx8fHEKocpUceflmvyq569H9e112vPB0QfK8YHx8fn21gOHvF+ILdx8fHp68owE+N5+Pj47PzoBRYw1h6DuOm+fj4+AxjfFPMwFJbW6snTpw41M3w8fHZAXj99dcbtdZ1PZfshmG+8nSnEOwTJ07ktddeG+pm+Pj47AAopZb3Tz39UcvAsFMIdh8fH5/BRfmTpz4+Pj47Fb4pxsfHx2fnQgHbHPx5EBjGTfPx8fEZpvgau89gM+exi6gqSEytbLhAKC+JF7UCqyB2QeUqlCvb2tZefEmr0LEfBU7IBeCoM/7E32d/1ruGVtpLOaEVKNecohUhVwItWiqIa4I+au0SsCVsecFJYZmYX5YVJOM0bnUPSls4oYKUD0vlx5x663Y8FR+f/kUN49lTX7DvYNwx9wIALjnlbh598mLPlXZJOsceoQgAdW0R3IBI6ngmjLZk+4NQOxFbzrBsqAiIwC9PiZDV5j3Nx0WgurYm0hIioGO8ePeVBImTt9sH9gZNG5xgwbuHYLrnjMDbyu/+IvGs8lpTHZSfQ1uhI4d4S8EhYskza3Nk/7XnPED1DElm07Rgq7StW3Hzw+cBEDPP/sKT7+aeRy+kPNC5ynfy8Xduy610y62PnE/KkQ5yUjTEgvYsAMWosl87009j2md8jd1nR+CoMyWA4rNzP91lmSPOvWVA2/CPez43oPX7+PQLynd39OkFTz15CYGsjWt3RNs89uTbvGNF88goKwRonnnsUoJG3XK0Zo9glLwtGmW+Mott3jpXQzgd8LZjRvt00WRcl4hlsSiQYmIwzNOPXwqAtUVaz4JKEdD9mv2tRwL5MNmghHkvhB0C2f7X2n/xwDlEbLnXStvGNZFO47bladd1oQCrMvku69jRSTkuIcvi5w+cQ9RSvNqW9I7dccljQ9iy4Y/qp1dSKXUckgbSBv64RerCYpmzkUTiGnhba31+t3XuDGF7DzjgAL2jLFB65rFLaUEEcE0mxLqwDIkdrb2hecSyQMP6vJhEKgM2m4x5wFZQ+pXVhQIcd9wd3DX3AkIlfrUJY2YJpO3NhGJDWQYQwQ5wzkl/5qaHz2NiUMK1b3DyVAelfChvY+UtscEbjj5t4Ozcf33wcuycMRU5Nrl4ruO6p/f/dUsFe7HDK1IU7CDPaKD53uyzgA5zzw0XPMQND54DwJW9NJP8/iExK3359NmdHr9uztkAhJTCLs6PAO+0pwCYHIkQNO/Q2HCImClUNEUdd9wdfbmlLtu4LivvdbNTYHos4h3rqt39jVJqu8PoRkeO1hPP+Uyvyr7/2x91eT2TaOUD4BgkxeKrwHla6wUlZaYB9wOf0Fo3K6VGaK03dHdNX2MfZkSsvhnuuvqxFfc/99DmMaM/ddJdW5X9/Gn3ett3GRv+rsDXz/Ltyj7bjuqfBUoHAou11h8CKKVmI3l1F5SU+Szwe5PGkJ6EOviCfdAo2q0D2QAj8pJpLRcvUOeGaLA6NNOieaQrAijiwY7jjz55MSCauz0A5goA5ShPa3959tXe/oPO7S4RUe/5y+OSRKgmG6UQFo01E5NnEsgM3MTpUHP+7ScCELflHuuDoe6K9wqtOyaE065mWkzqTDouIWOemxANsSYr5iUL2KcsDsi7VxzptRQc8lresxZcRoa2X1T8xrSrPhzwtPYdmt7L9VqlVKlJ4Wat9c1mewySjavIKiRLVSm7ASilXkTMNdcWM1x1hS/YB4G/PXA5ZblaANrKGylEHMJtQdyAi+Uo6nQQK2+hjHwv2tnHGZdFsnDqp/4IwLOPXIbrul7dOatju8Nkovnrg5fLVlCTSkjFtlY897Bo8E5Qzvvkibdv1taLTrm723spCva3I0tZ+8RF3v7TTth6JNAbHn7iIuKIMCmEHZbYYhYgC+PDoQExwQwll9xxEgB7xGLsFY8zv73Dy+hH5z6wWdlSE8zPHjib91Iy5zA1EvHmUFKuS6BEwhQn9NbmcuS1Jmq0yrhteaa+a5+fzP8etQyAxeksU6Nihnu9LUXcmKASAZsWY/6rDQTYlHeY/eiFvNTaTrnpiCoCNo3GXHhkZRnz28XM15OnTbFTCSuL91JyzvRYhMvvPBmA4ht928W9Sf06RPTNj72xG9NPZ93DlvbxADANOAIYC7yglNpTa72pqwv6gn2I+Pj5N3uTlaWUTp72hlLXuGce69qbpb8oaulvGxe+geS447ffpjvcuefSx4e6CYPGFWeIDf07s88c4pb0D/3kFbMKGFfyeSySq3jLMi9prfPAUqXUQkTQv9pVpbusYB99rGiesXE2iYmanxxgRkMKfvyePJZrpnVowysKWW/CEeBzJXbpzrjhwXPIl8jow6jutFxRkCtHi9ZutACtOk52g66ngVsorIJoTHbe8oS5nbOwsbwFRUU9wMpbhByzKEkrnJCz2XGAx5+8mKCps0WJBtaZLb6UfMkM7vpcgZsePm8zW31fcIyf/TydJJNzeyg98Dz11CVkXWlTRVuIlnKZ4E60hVkWkRHFFz41BoBs7uf9cs1Y4ipvO9XyM2/76/ecAYjG7ZY8c8dsaw3tWp5ZTdD2tGGAcaGO/OXtjusJoisO+4CloihTHw6yKisjOgsYqcV0szCd9vz6l2ZyTCMKwGGViqqslHEDLpYxG77Y2srYiIwwxwRCKC2OAguyaW8ieoobY1Ugwx4lE6bFCf+81t7E8T7xOLZSXD/nbJoLjnffxXUX3zt7zlbPrzgxDGw2CqoNBr3td1v70T9RgQr0S32vAtOUUpOA1Uii8C09Xh5GEqDfrpSqRUwzH3bbvJ3BK2bi1Bp91DWHeJ//1MMQ7pI7TmLOlTMA+NofllDQ8qMA2C9QRjoswi2olGfjjaUqeTm8GoADM6NQRgK/H1/HmIC86BucPONS4hZoFWy0WY7phFwwgtrO2eRjUr+Vt7wVmwAocAMaK68IFqJko6mO8wE7u/nYz86XCHLwFiIB3uIeK69QuuMFLAp2O2d7A75kNE+4+KtXbDYQ7Epr/v1D51IT6NALWhyn14L9Vw+ew8J0ZrN9p9RU8m57hkyJmamzH/BA8ezcT/NIWzMAx1RVsNCYCPYui3LaGSMBuGX2al5uE6FhASfUJACY27iJcjtAoeS39Ivz/wLAucaOPvvSxznuluOwzXtwRGUlzzRJXbYqMNUIu5WZDFOj8g7NjEf4x6ZWAOpDYTbkO+Zi/vXCvgBcecIiPkzL/tW5LAHzPe5v7OZFm3lrwSFn2pdxNO3mOdcGA545xdWaESERhFUB2xPI1QGboHGZVW7JqFJ1vGdvZ1Jo8+LMCpZ572IwbZOPyjuXth0CChalpKOsDQVYatr+i+9O4cLvv+8921Eh+U2tzeW855qwAzQ70taDyuOkzD1syjvUmTmA5ZkcBdO8GfEIi821psbClJv7OfOkP2+3V0xs1Gg99dLerbmYf/0Pur2eUuoE4AbEfn6r1vonSqkfAq9precqWeL6C+A4wAF+orXu1oVol9XYQYT6cOLIs//I8/d17UJVXEBU1N63hWNPul3q+Mu21wHimjb70Qu9z9uqrRc54fg7OGG7avDxGWT6aeWp1voJ4Ikt9l1Tsq2B/zJ/vWKn0NjHTqnWp/3wcAD2K4uxLCNawMpslgnhjuHodRdNAmDMORES08wy8eUuI2eu5weTTVySiMM1b0pylV+Oz1KwzISQGyEWGgFAOJRgjXoTgPL2EaTtjYBoMkWf8fmRVlaaYfHqXJZRxuMha4bNp8SrKc/U0xZZJ/Vn7M3ML6XakBtwO7RxrbDzJW9UySnFyVPlKpQxv3jnKYn3UuSvf7mcppA8p4itCDtm4VJAEzBL+FtDeRxzememmT88JHb2olZ4/sld+3t/5e7TNvs8JRLeTGu/8cJHNjt+4R0ncWmFPO8H2jdydFWFd+ysE7dtonZLrrj7NMaG5XtZlc2RsEXPKWjtTeBtyOd44FsyutMuFFrlgdw0e/VmppHPnCYeaFfeXUFTIc89n9kDACukcNJS7pRb3sEyGvXj/70nv7t1FQBBS/GlL40F4LLrP2DPuJg9Mq7LVZ8eD8AZv1nAgeVlADzbvInn/t/eAIRHW/zq5ysAiFqW987tFY/wUmu7p8kuSmWZGJF73adQzmt2GyCa/IEJGSG8156h0nyXUwsx3jPhIwJKMTYsmnxzweH2NXLu18ZXkjIvSMp1vVAMSsEYU95WypN/QcdGW5p2LeU+SGU9803EsvhPi1yvMZ/j0ITEG3qxpY16Y1Jal8tSHiiGfSgQMmYgWymqzHc3JRr2JmTjtkVTwYyOgd92vGP9orFPu7x3Gvu8n3SvsQ8Eu7TGDpBtcDl53CY+NJ4ZuZQmk5QfUCrRSN6Wn3isVdNakB+QdjVOwgwDg+sIJeXlLEQdssbMMrO1jBlBYwcNgWMCWf1+/VoOTVTwRiHJCmcBX7Y+CcAG5010oFRKixkFwMrbnpB3wi6FiGP2W9imjC4xoWil+SAgZpygUl4+gFsfOd9bfEQEz/0g5biY28fSkAl3uMI53fT7RS+NSZFwp8d//oAssPlGJ/7iPcUm2T0aZW6qCYCs6/L4xk2cWFPZ7Tm9peir/7FEOUszMlQfHepwNSze8qaCw5LmGrQZ2xdaNbfcL/NaBa29cjHL4g8P1XHF+eP4/WfgpN/NJ59/EoCT/zDWMyWkXTzB/tNbVuAYd8JnN27i8z+VZ5nWmiVpadOkaIhb/yzX+9LVe/LIxuIVJxAdL/V8+ZrFfP3bUwG49sfLPI+UpOuS15qZWRGQVRUBzyyhXMV+QXnHVQCW5+Tas0JxUgF5t5apNAlLXoqgUqTNnEN9Icy3Rsr3rdKKsFFG4hGHUUqe4YJcimXGxNLmuJ5g378ixrvJDHuFpSP5iBUiFZB3bWk653Ue+5ZFebJJHD7OG1njzRtkXZd9yqLe9/SvFulgGvN5mpSUWZBqJ2wE/iEV5VSZjmppJstX/3xqqXDfPvzojgNPfdVkfnfBw1vtP+K3F7AU+PtXxYUv68ps/P9dPZVCu7xEf/rcai69QX4YZ3/6PUaGgpy37wJSjsvbGZs94vISFyIuqxBNoDHvsE9BXs7y1loKusMWro0fdrYiT8Zo5+WZELEW+SHNiMU5JCs223isgTV5cW8NYNNcZoRM62hSwUZz3Y6AVFnbJVgyFe8GXHIlNvZwpvuvM+W4XiAol46RZLPRtPYtlx+NMjOrGt3jJGqRDbkC375Xnu915z241fHOvp++sj2aetHN085ZjLbFnl2ION4kaXHlaXFSeM8yKXNkpca5Q0ZVF1012iuf19qbdJwSDbOgPcMXbllERrs4Osjld05mj2iUb3xKJuXPvq2KsFJMj8l782RTM4clxEZ/aKKct5LyDh1UXsbLZll/e7vjdQpuquNervnNMn78jYkA/OKLk/ny7z8A4OovTUObGfvLf/MBFoqHaeTE8krebJMKDqyI83qmFVxw0Owei1BlevUPcxnyGTl/UrSjo/vXpnYOMlq90gonIO9QKG17Cktlcw2ZUAsAe1Pm2dVD7QEsM8L87vt5rto9TBYHC1jhZCEro1hbieY9JRpmfjLD6bXibPBsUytHmdFa2nGZu1HmQUaGQgQti7zrcniign+2yFzEWXU1npvmIw0yAjihtiMchta6/6Iy+rFihid/vGyVt332p9/ztovCbzApCvDRraO32gci1Hui1Of76E6OFxeudEXRdfKRJ2TRk+rhzf1NibAuCvVSOtPUe8tgTpyW8q1P3Q90LPzqC8WOHGCPaIdmefZtVdvfsG3kxPKOUc6BFXFv29nKVXrwKFV0syXPbEq085Ffd/zp4kf5lQm90BUDErlSgeqja/JgsksJ9l+cL3bMQIUm1+hy8c9G840rlnDF5xczMhTdrGzR0+DZplZv325R0eL+vmkTbeUidBOhFHuaH3GkJeQJ4HBriDItL2q6rN2LSz4yFOCW5FIAmpoKrIrJ/g35HJekx5AOir3eznV4E6wp5AiYIGDVQZuAGcIrVxF0bfKBjg6gt0Qs5Wn/SdORXW9cxqbHI12e1xuuKLGn/6YfNPW+UNp5feWMDseBYLu86tmK/GYeG4chWnMqnsNWiqeeuoRgewDLKHnXLZJnc9U0xf0/2sDTxn57289353++vwyAhaksa3JZokbrd4B3UqIhf/qOGm9lZ6lAqw0GedeUGRMKU2lsxJaCaeY9GxkKcs2dMi/0uxuWcuW1Ym+/5tIJXHKjeJAsS2e54zezAHmvz71G9hc01AeDnqfPwRVxXmoVDXZyNMSMvJhoVIvCckRjTyWSxELyTqzPFagx7o77lkdpzMk7Fgu6XhhlJ+ySjMj7m4w0eia/WCrolVGOIl0tI9EL4znCTsTzBBsXtLzvYk0uz8XXilJz0RULGRWWa+8ei7Aqm/PmQoocX53gtBPv4qaL5KKn/ek471jp4qqBRA1h59gTO7VgL5pgesv6nNjHR4YC3irAK7aY9OsMy0xUzo+0MsmWFzBkaVoSMjQNaIt8VH4Ae8QitBbMqs9YmFn6UADWFF4jb4R/whnLpthKmSTVMDoc8iZDXeVSMMbxd7MZbn9xuteOE4/vvp2lgu5XD57jrSAMW8oT7lrD881tZr/Fv+89k592YlrZklLzS2+e2UBQf8TVwGS+e0U3Lr5KzAlaaZYF04y1pPMtJiMpuo8WP393Iix3s6zNAhS8Tu+L31zElZ+eDMBVN37IXnER3qtzOfKuS7kR1AdWxHm3XWzYEcvyArjNjMe8EdGmQoFK42+ddFxvJegvF4a57RwxA33lb1MZd4p0DSftPp+bbpAJ3U98YR6nfvldAFoKBfKutPuIygrmJdNElUXUtvgglfXc/aKWRT4mgi+YsnGV2VbKW9E6oRClEDTRQpVmnJn8X5HPMjEozyAZznvKQdbVrDRRMCfE4J2kdIAfiZZhmYn5qU6MbLgANp5rpN1Le8apJ9zJnMcu4pAKMcv8duUmjrrxZP76BXFtfugymdMozp985YzZfKVXNW8Hyhfsw4Kc/nWXx64vWeDw6VPv8bY70zi3fGG6i1/eVxLO2I4Pw/e96ZbB1tK3pLQDK/LxCyQ0R3FB1ydPvJ1bH9k68mkxeNq2fKerc7meCw0yUXv4zfDpkhe7dJ3EZaduQ11bePX1FBKjv1BoX2MfztzUz0vjixbDtkSWuJmgfb7QwsfKZOgbyNgcUCHj/ANGfp7X1t8EwCvpdv65UbF/5XwAPhmsJNQuWpJK2VxlFhlnM9W0GoX08hMWoo3bSttih7EnfQeAfU6TCdnHP/P0Zm0rrs779qfuJ2Zb3tC5reB2rABEc/NXpgFw6W8XErdsvnHPGfzcLLgZjtR95NsAXHv10l6Vb9R57nn0Qqa3j2F5+VrZqaBGd6xSLC5Me1m1egt1lmdy3iTroZVxym9ZBkBLQZN2NcdVizY5ry3jhb9dm80z0iz6KbMtzxU3btvehKmrNfuXyztR3RohF5SR4w8maC+mz28PW8X6kJg0YnaUu74n7W5wKz2tuSFX4NZ3dgOgfrd3WZvPMdlMFNcGA5tNiK7ISl3VcZuKvLSvwrIJpOS+G8NZRrTJuZlYntqkbE8K2Z5bbaItTLZCtPSIpdjNmDOzOOwflRGMeDZK+aLHWF8JKOmAL7mjjb3i8pzqI0O/Qnnz1YXDi53Cj7238difevISb7uoKRQF+/YusClS1AJDlvKiNC7lPf0/AAAgAElEQVRIZhhn/HVTrstuxoCrbZdgSvpWJ+h6Lo0ojXLVZsJlt5gMzyOWxYst4jXxnyaLDctmeNf+9UEyGWw5iotuF88b17jqrXnmf7huztnsZ34YzU6ByqLrIzDfDJ3bHYeDzSTb3I2bOMW4GDYWCmzKy9C8M414KBhx8Lf55n9/6AlREOFb5BdddEb/uvurAHxQtpGb3qsH4Ed7NlOXkQ5tdv51ao19eX9dzqKg2KbLbdsLK7G+JDqhg/ZsujPjUWqsgLeeIZyO8bv8h975RcH0r5ZW9jAeMo7WfDwunUIoGfA8TZSjPDNflDrabfGV/zCY8uLI1wYCPL9J3gcLWJkTgW2D54EzKx4jjcNDDWJvv6Cm1ssJUOkGKG+X9QLlsTFeLtpMrhnHlboioSo2pkwUWQ2ZKnnG4dYgtiOdxX8tj/PZGeIBtHt7uWdX/8Jt4/j+BXL/xQ6yNNZ9b9IABpXk2v3inxtYmulY+5AIBPjzNiQD6Y947PExo/T0L13Sc0Hg9e/+dNfyY1dKLQPakPmmgtb6AKVUNXAfMBFYBpxdjEM8EPSXQO83BtBu9+1P3c/TT1za6/KlQn24MeLgbw91E7ZiZjzac6FBZlZ8cDNfDSbbItT7D43ybezdcqTWujRN/beAv2qtr1dKfct8/mZvK3v8yYtZUZLK7IunD53gDhtN8qDyeMfkZ9hlo4n50ZgrsFfB+OiW5wmnAoQyMuRVWLRWi0fOvkSJbBLNSFuao1UVz+lmjq1TzJqwHIBmt0CzuV5rwaH1XRP/I61RAUX5aHmE6bXSRz74eMJrE8hKRQCrYJE12pyjNVFtU21W+43UIZa7Wf74sIxKPnNax1zEYFM0RcxLy8Rd2JKsQPOS8my/Uz6Kv9/7WY44T3K0vnrfdwFIso4NCdH6xhLm/6aIV8wL2dXsmxdTztHxCkJtHYvOigt7Dqss8xJ/j4g4rHXlPVuRyZEI2N7CouZgwVuI9EhqtRfLPKgU7WaS+tL6Omzj6aRtTbBFNPza6ExGTvwoAIsXPEBbufw08u46bzFaxFZkjAluEw6zzKKdD1JZL0bM000tjA073qgvFSxwUcKsnG6J4taIlj8iNYMmeyEADSF5plPzHyUcStDQ9ra0TzuUByUAYShYzhpX9pfbY2lVoqX/dFqWaErmh1ZXrmNTVt6h3356Bf/eZGLDBAO4QNC8dj+8dhJc9S2a5vecEFyuZ7N3PM689gFOqN5rfMHeF05F4g4D3AH8nT4I9u4YjDCwl5VMvBZ9odu049kobTcE1fKjqrICrAjIS1qnAsSy1aAgFW5icuIYatPiVTPliPNZ/Lws19/YtoDXIhuoI0B9KMjlf5Aogzd9eaW3vHtDrsD//lpWyX7t8nEUX8D771hPxpWOJJ4NEE6ZpetlaTZ6yT4cKEA80PmkW3kX+4eaK86Y7aWGK3LJgyPhwe/y68uWY1eINAm5FgnjDRTfmKDNNmsZLFhfLt4r5bbN+opisC9FnSs/k7zW5Mqkw3w3mfHcXJ2wZrRZNflBKsu6bMEzN1wdnUgsJGaxpLMGbRY4VbZPpklJrKIJZUeAcTdvaf2Q1UueB8QEUhvcE4BctoWMGbgW1FJajIvr6JYaVlaYxWxoXjQLda6Jz6CmfAZ2QEx4k+0gqXYx5TTY85kRkaiRbjBHNFsj95dLEgonIASrm/9FRUjcK6PhWpraRPhXV+/BuHaps2bKPiz9wGjNDmSMfladCrNciWfVJCfKsZb48auCIluexyrIs7nvxxuwCsqLfVSMhbQleS2dczHc76OXd5tjYnBQoIaxjX2oBbsGnlEyprnJZBUZqbVeC6C1XquUGtGXCk/shc1usCjaDx94/KIeSnaQCjcNVHP6RFe2z/seu7DT/YNNMUHIJ0v2ffn02Xy5tNCD3x3EFolQ9xk4fnJuz263g4uvsXfFoVrrNUZ4P6uUer+3JyqlPgd8DmD8+PED1b5+48zrRSv68bXLmZozmnKoQKtZ+DEqHWZp+VpqlXwl/0o9ykYTTnXPv77MlLJjASi0ZxhjFmskCDD95HkAJJ2EF4xr3/IoK8wk4tybN3pBwxpd1/PqSEcKJMOi3dlKUWlSrr/Zlu7yHq657ywOSchQf0Umx80Pn9djXPrB5KV7v8n6hJgT2h2Xu06TMA4rHJc6WzTqRdkM08IyemqraUYZc8h0okSbZH+6OsOEVlGhF8SbvJRyZZblhRTYKxLtyHTlhMlpx1uxXBcKeJOvdi5CokL83UdVHMryZaJtJgurqQiMAmBd4ysEzIR6eWwMlXUSQKx5wwKcfMdkYasrZo/RhX2ZFBEt2KnIEMvKorba8GIOD1dwY9NaoqFaVmX+w5RKWbgTTdTTnhSf+LroXmRSck4kVkPV6JkArF/2HwJGw5867gwCxssltWkNk0dIdiMrECJjYvg0rZ5PXbWMKJo2LSRvwl1nIwUmufKOWknF+6FieGKYkpX77GtCmeGHb2PvEq31GvN/g1LqISSx63ql1CijrY8COk3carT7m0G8YgarzdvCWSfexeev/hYggYziaREaa9R6z9OkzKnDNlEiX9At1IcDXqKO990US5ISvGiFynFe4WMABOwo34tLlMlYaiTaRM07815NrklOvvFbK4kHRKiNbqkjj5iBMpU5L5qk5Vi8kZYfX2tB4pN8454z2NvYbi865W5+8cA5ngljRyARsFheIeYANGxyRehM13H+kRQTV1XA9gSwC8Qq5fnVtURYXymd3qq2PB+Li6tqs5P3FrEBfDy3PwAbAguwUZ4tfXQwxG4hEaihmmrswNa5TGvL96QYQzLOKKKxojkkRapVOifHzRMvE6+ddGojY8o7cg5oV9rq6gLpnJhAJpR9nFRqA9+rnE4sNoLa2N5euXTLOqpGiAdVtLKejSvkvcllW9FNUiZgR9EmxvmGta9hG1fJdLaRaFJSOy4Jz+PQURLV0A5FaduwWO4hUs+40RLI4oOFsynGdHTI0mJJ/fuHynBtjeUojjn1Vq657ywAfnjO5ikBbzOeZaXrSYYbClBq4Fe3bitDJtiVUnHA0lq3me1jgR8Cc4FLgOvN/34Kxza0bHxbJohu6CGuxQu6ZTCa0yd+8UBHm4sLeIYjB5/3U2/78W2I9bKtjCjMYG3wnV6VPeT8XwLw3tw/DGSThjWl4aN3aHyNvVNGAg+ZSGsB4B6t9VNKqVeB+5VSlwMrgE8NYRv7jWJG+o+Ul7EwLkPikcEgY/OiFa0vW8vU/AEsb3kWgLOix7Ei/goAH6SzpM0wPxGwaciJEClEHFaatG1L040c7xZjiaxjvC1Daisn2rpWkCdJtkJMND98P8RVe8gwf3ku7WmbI0MBLwtPEWvzjzsEtS1j+aeWCb8Z8QhrTejXFtthrB1kVTbPdJO1qCFfYDQhylpFK81am6jQ8jyOCCZoVPKMc66mKmB70TC3ZO/c7gD8x3mPUVnJtjWqVmK9OCY0bvNCWf5fN/EjJBvEC+eD1BOMbBVtOpGY6NVXP/5gcinp6Dc0/pXx1UcCkE43Up6YINvtDYwff4zUs+4BxsYkREVr2zI2Ni9g9CjR8uM141EmDG8hk6S8Ws4v5NJEKiT/QLSy3itTUb8b6U3ynm5Y/zrNWYkgmdAh3lss8y97TL6AYES8iiJldeQzJiplddLz4io+XwC2GLgkbJvxkRBzHpM5qN5GEh0e+CtPO0Vr/SGwTyf7NwJHDX6LBpbi/Pn+6Soaq4zN0VVebJJEa5h1wbc5Pz6CbKxAJtlEOG3SlEULjGvf2h/5pUySg6JiR24OOLxtyxD+jbZ2GiLyY1qfE4F2wwUP8Z3ZZzJTi3nlhLHtPNkkQv60WjENrS3Jl1nkolPu7jF63lByx9wLGONIJ7bWznqpCXPxAh+1O55Z8xa39sNzHuC5hy7DCbuMCAQ3y2f7uLuR4zNiwy5EHJ7fJCadg8plfuGKThZo7QO8PucHAOyVrkBH5Xt9e9lNLCiJuXtR4nsAuIUc5SMlXPT+XOEdb1o1j8p6sbEnG5dSM0nMPdm2PEsy0um3BQpUr5UVyi2JLK+tkvpP1FMIR+S7DEcqsQIhEmOks3EKOSLVIsDTDevIJcVOnmlvZFOjdIA19XthG7t644o3yOTEC6dFrSCfMOaajM3oCnHHjFbVk2uXjieSqOOtf/8CgBftdo4Il4gWM01wzKkd0Ue7YzibYDbD19h9esu2LrvuD7r6QQ1I2NNB5spe3MNHz5fE1H8xCaT7yv6f+j4A/7i768w6VbvLROWGN/69TdfYEelqBfDXtyOs8/DAF+y7PCeZVZxpK+NF0KvPzGR+8C0Apri1WPkg06IHAZCzWmnZJIuPRloh2irFHNBScAiYiHinO4fygfU6IItWijFDRpRkZp9hFh7d/PB5rMvlvMVGScelqiQ58I5K3LZIm9gq9bkwBZPoZLeYZFB9KzuXPe1jWfz0XUz9ZNdup62uw/P3S77ZEwLVOCb88ujCvnylDH6XfJ7d2+tpSzTx9OOXMiq3F+stMavkQw6BguXFUMlVu9S7ovFbacWZoy/b6nrJxmUEI7KmoGLUVJINsu4gFCr3tOZU+wbWvmxs8QnYaLIETUlO9PzTw5tavVyxVYlpninFCoSIVY8hUiNaupPP0bZSJjrzmSTZpEy4JkbuRnlBRm6FXBrL7TAzNYYWyTVawzgmG1iVM9EbFWxauYANDW9InU6KVKWMRKe7EdaTZWS+8/jq39ii4xzOcYi6pB/92JVSxwG/RiJB/FFrff0Wxy8Ffg6sNrt+p7X+Y3d1+oJ9kLEKFtVJGdLXTtqHI1xxF0sHN1A7ScJJZJrX0dDwFntN/wIAi965jzKzGrQxvo6wWfxyxZoX+eYEqWtK1OIfzWLjjNkWaePdsDBV8GK/HF9TyQMNMgR/9fn9AFhyx48G9oYHmPfaMxwUk/uzcxbjKz4OQKGQIRCIcNoJd7H46c5tt9qCeLN4vNSGakFBqxaXwmhKhG6kVrxVbjztERY9dQdlaRGIoUAZsVYpk4m0oW3NOzkTXz0cYon9AgBWHBYv7fD6GHOIuK02tyyiuV067sjKOLUV8h5UjZpB3tjVN7YtYH5cUsQt2pTl5IhkFaqt2ZM1618CoCw6hqpqCfwVjJQRjInN28mlybY20N4oHUbluBleh9G0ej7aJLhILv0XkbB0QrGKUaxfKfM6jpPF1mbVak3as5m3qhXkN3S4xFql+eG2mIvpapJ091hHvP+FqUynZYY/ul8Eu1LKBn4PHAOsAl5VSs3VWi/Youh9WuteRyL2BfsgceHJEk702Ue21t5KyTSvG4zm7BJMPErm3bvT1LfkyLNFEXrx7isHpE07Gx8558fedl/iEBXZIbX1Iv1jYz8QWGzmHFFKzUZW328p2PuEL9iHgD1miiteqmk1lvFxDgZjtKxaQLxGPFvqx3yUrJngmvXJb/He8xLed3yhhZAJT/C5cZqHmmWC66jKCqaZCJBL0lleapaXriyUYqYxxyxJZzmxWobRK/fudHnATsOqpn/y9EOPA7CbHZVk34hWX6tkcnIE02ljRa/rrKidTAWTPa22yJpght1SE/iY8UTKtrawpGw9INE4VaWYOrSlvbSD08IHMaJSJkY/cB9iXVKikyY/XEOblhG3jmhmZmREsVt5lAbjnUPLs7wSkgnd8xKHkm5vAMB1cmSSsh0IlaEsm0JORnHrFr5AKCR1RWLVLNvwDAD1ZQd42vuHyx5BWybhitLYrmjjwVTAC7ewR24fggGZlE5nG3l9zg+8uYUixUV2W3LdnLNJu66XtHpHR9Frjb1WKVUafvZmsw4HYAywsuTYKuCgTuo4Uyl1OPAB8DWt9cpOynj4gn2QOebUWz0f5ryTYuLepwBQyKYpZJJo16Fx1ZuMmHiQZy91MmnPQ8Etc7Ey8sOoy4U5rcakIHMVq42tdFZ5jIMTIsluXpPyYoDPKo9RYYbX3989yw8W9j3H5HDje2fP4YW7JZCAY+Vo2CgrcfNxh4kh6TTLmmvZfV/R2u1giIYPX/XO3+eMb3Ra76EX3NDlNfc+/eubfa59/I9U7TaDZOMyAOLV4xjTKgJ2ycYnuK5JhPzVlSOpcMR8QxksXisJSUaG96JdSUc7fuonWfy+mG5sHaE9KOf+J53kcFc65bdCrVSaVcYvpR5hP/sTAITj1TjmHbADIdpb1hAISKcejlSyZq1M2FZVTOOlgJh7Dkq96iVRj1fZxDeJ8M9GUkQzYtYp0OJl21LKwjXBz1rU5p3iJ0+4HYBnHrvU23fcLcdxaq2Yeuq8xWDDd9Kx9+i+aOyN3YTt7cyZeMuKHwXu1VpnlVJfQGJofaK7C/qCfZjRuOrNQbvWi1+/adCutTMw8oCPDXUTdgiOPen2bo8PpzAU24qi33KergLGlXweC6wpLWBcwIvcAvyUHvAF+xBTyMoQV9k2dihKzei9AZn8Kk525ZIt7LH/pQDMe/3XOFomnNaGMlQXv0JLM91EG1yZy1Ftkmh8dWS9F7qVtPhl70y8PucHpMy6gHhzuaf/hNtC7DNDvFzS4XU0LpORcCAQ8bRYgOV/67DxTviEeGu0LHqfbNvmwdhG7HcIXbGxZQFVo2aQGC35Z5tWzSMYFHPFtJGn8cca0aKXbHyCILJ/wv6nkntJzCm5fBv1I0ShC8YT7LHvpYCEDSj6m+8VKfMWOR04fnfaVomHy53Lfsf+ZUabtmxamySpRVnFWOKJ0Z5pZsXq58iF5Hzd4rJ/XNY/WEnLCyW8MJtlVo0RVhoKEbl2g5NnVVK09Hm8TJmZvI8Hh2ekz0FBgbL6xSvmVWCaUmoS4vVyLrBZzsZiiBXz8RTgvR6btytlUBqONC0Q04EqicMSCEXRrkOoUrwgsk0NrH3/HwA0ty0mExCzzOTaE/jVWvH4uCx2GBe83ObV8YOPSUjYXy0K8IMxHXV/4lPdekntcKx96a8EIiKklGV75qtcssmbv7BDUQpmVaR2HW+FpBUIETTnatfx4qoESoSoHYp6ddrhKE62wyOkWGdLw2JGTP0o2nE221+sa9G8jgVNjiudcm3lnsQqJAhYy8ZFjJ1xrHcPxXa0rF7orQrVruN5vESq6whXybvRvmoFr8y7DoDnsy1syIsAPj88gt3Gnu3FncllW1iflzjqylVMMUG9Ghvm8Z+AuG1OioTJG3mQ15p3knKvh8TKSdvSpjkNTYw0ia0rAzal4uOac+Zs9f0MR/ojg1L5+Bo96+sn9qrsC1fe1e31lFInADcg7o63aq1/opT6IfCa1nquUuo6RKAXgCbgi1rrbgMm+hr7EFM9Y29vu3nhu15Eve3ljBNF4P/qhv5LtO3j49NBf0V31Fo/ATyxxb5rSra/DfQpZZgv2IcJDW+9DEC7WagSraxn8Rui6U2YfhI14/YFYGz1caQaV3ecuJYe0daOPyorZelzs70YJXYg5Gm44fJqNi6VBVsV9btjGc0y397iafXpTeuI14rnUSGT9MxdbiHnbedTHeVLNXk3n/NGAcq2iVSKNh2rGUNq42oKRsuPVNSRMZOnLRsWMXXPjnBHaxfLyCud2UgmKyMv2wriFhcGuY7Xuceqx1A9Q773TGMDrStk6X8u2UTRmSdaVc9e46Tz3i3ZxL/bRGt+KtmMWj6Hv7jSjs/Fj+BhE973XDXZKzc6W8ZJNZLkfFHLY7yZl1Ffq+PyiQqZSH03k/Ima/eJx1lnwlS81CoRMA8qr+jyu9q5Gb6/K1+wDyMC0TIC0TLaG1YQqapjyr4iEKwS4ZVPJ1ny/kPeOTdc0LE9//St63zhytu8DDU7A+te+TuxyjGe4HVyacLlYpbQrkP1BBGEhRKTiVvoEMhlIydhGdNKKJ7whLFbyHkdgVUSZjcUT5BPi2klEI7i5Du8Tmwzp2FHo5RHorStFrt3qmk1m5plu7x8jPfdRSrrmLjf6Sx7o+M7A4jFR9C2VspXT97XM8uFy6tJrTGuj45DOCEdSSAU9Uwxf//XlXxkmnj22KEoh5iYeQfHcixKPslxIekAV+u3ONlkMvpQrWd8i1mYVbEXIWPi2T1wBi+vk+idK7MZNjgyFzE9FmGp8azaJxJjoolDdHBFnNvXbdhhTDD9jR+P3adH6vY9yIv8N/aw42lfvarf6u4q5ZjP0LDnaZsvflr63NZBxXYUHv/M00PdhCFCQ+/92AcdX7APM0JlCU+oF4fn7/7nd15o1o0b3iEdbB6y9g01diiKHYpuNrnplsQ3KU5gKsv2NPNwos7bdl3HmwzNp5O4xu87ECkjaCJl2oGQ5w9uBULe/lJNXrsOTrpjVKBsm3idmHjiiBkFzKjAjB7y6SSWZdOy6H1c1yHbImaSaEW9F553S4qTtbn2FiJGY7ejUVa/JpmYZo39ome+qxg11Ru9JNcvZVTb3tzX8i8A5rUnmRoVj5xjKysomGxH6zNvUl8hbpxvL/8F5cbkYinFgnaZ6D2sMu7lug2kbWqw2Rj10wD2k1fMgOAL9mFE1e4zO9XUp804hzdX3gjAflO/TNP7C3td5z//LPFmDr/wRhbM/T3/dl/0jn3mtB0kPCqw8R3x7w/FE7iu4wnkSKLOM5Uo28Y25pSiyaSIFxyLDjONHQxRNlqEcbph3WadQmnWIzssJhftOARiZd7+Ynk3n4M82BEp17x0PpXjZ3jlij8yZdlYZtGUFQoRTogQdnPS1rLxE7a675ZF4vyQalpNvF7aum7e895xJ5f2wu6Wj5xEy2rZbmlazJLQGwSz4v85KhzmpIBcryY7nTXl4o3VmC/w/PwfAhBUyvOKubRiBMmw2NJd4EDbRNKOw6r2f+9gsdMHCt8U49NL4mPGdmwj28WJVZ9dj8Q0CX+waeV2hQ7pV3Y2l9ltQSnfxu6znSQbl3qrl+e/d6OXHBjgyJJyf5sjC3KKhT9xVte29VPGf4UNb/y724U3wxErECLf1uT5dOfTSW+yUTsOGM08EO7wP1cl5hdl2Z427+Rz5FrFBFLIdoSsDYSinvatbNvTqIvadnHb229SA8XHjKV99SoCoag3igiVJbDtDo2/2NbW5Qu9UQdA+eipvX4GhULGS8ZRNnoSrY2yKCnb1uRNhKpmi6n5A6D8Nd42CcqLSV1CdpmXa7cqYPN6m3jCVAWDNJqcrq+Q5JPVCe+au/ePF+5OhG9j9+kH7KwIhOayLKdUiXubW8h5SR3qyvchYVYmr4kv8c47/MIbve0Zp3yZGUhcle4SPZQmivj4BTd3WW4wKXqsuIUcoXhis5goxVWidskaAAmAJQLNsmxPUDuZtGeuyaeTngCOVNVRSCW9axSFrrJsAvEO80tRMDvptLddFP5tSz+kfNJkcskWz6ZfNNeAdAbpRoneGasZw8r5T3rHRh3cfdKw4qpYgEy2mZVLJJtSdFUt1fWSvKO1Qb738pqJXtlTT7iTU832cw9LZFHHzWLbYqJpzDtcOWI0INmR3q/oUBoybofg2u2EzqOSvv9oR1iKPU7+fLf3sLPha+w+28Wko89l6Zzn+rXOHU1T9+k9Iw/4WKdxbY4+TVLTvfvIb3pVz86QOWvg6J947AOFH1JgB2b1v5/xti3LZu66DtvnVLcj3+fOkBU+tU5WYlnBEE4mTdBo0U4+R7qhI4Z9UYsORss205aL+3PJFs+v3AqGPM26kE0TiovpodTMomy7Y1K1JOyDFQqR2iA+5rERY8g0NRAId4wYirlAw+XVBMo6Qh60r+2IiFiz56xtehb/uecbVCckl6lSFqFwuXesGIO+J+577EIAqgiw1kRrLI38EjfxYIormDvj+fvE9HfkOTuWzb0/QgpUTKjUB159eK/K/vULj2739fqKr7HvRNgmAtYkN+pN2M9JNzLnz6dy44WPDGHLtp98mwjKQKyMQipJLimfnWyaD94R7554ZBSjJ8uPLVJd5wn8UFnCE86lwreQTaOK8fCjZZ4NPRgvI4+JLeN0eOBkNjYQHyGeKcq22bRWYjEtX/g4k2aeTsa4LwLeOXY4Ch2hY/qN95SsYJ1pHU04Xr3N9bycamdUSMJA75efzlxX4smMDPUyZrqC5+//DMqRd++I827Z5rbsaPRTdMcBwRfsOzDFNGseD3do5hGToGFxciM9sfKfj7F4ncQGL2YQGm4UY6iP2vNINi6VPJttSdGY45FRJNOryeVbPP/21575AS6iiQZVGfW1ojBZdpBCQfyzw9EqKkbJpKUdjrLqLfENH3/gKZ6QTzesIzZSfNKD5QlWvSF28Xw+6cXILyaqCETKeGHZ/7KHls4lUbf1hOi2aumlfPT8n/PMYzLP0tS6kKZNCxk/sXsb/ZbUt8qIrp4Yi0LS82woW4xj3OmrAnZXp3okLHHPbNHLyVTleii9E+Lb2H0Gg1K/9G1N7fb8/Z8ZtsK9lKJQB0imV3dTcvB4Ydn/bva5/sAjBu3aPU2+dkfxvXnqyUu26fwjz/4jTz21befuuGhfY/cZOnLZrgM03TH3AgDG5SLsNvosAJYvf5rX7pdUZ/WjD2bxKolXfsS5w2OI/eKb36U6NYFWSxZyRXIJ2itEzcylk8xbL7FOCtWut9imtjnGspa/AbAw0M70jNi8I1R7WYxChTLCQbGxNy16i1iNWTlaliDbJCaW1rWLqaidDMD8tXfixkRTX+fkyb55KzNqRLg2Nr3D3XoR3P0Yl4cPJJuX9lXExYwz45Qv99vzKIYB7i+mmvSKWa0Jq86S+3TOm21pko7DC7PPBOAn5z7Yr+0adiiGtcbuT57u5Bz+64vZtG6y93nvPeQ5TQiHWZBKAXByTRXNxq95ZlmEcIvYV/ea/gUvSuHjjXcC8Pkhzn7zzGOXUp0SE8CDzjtMi4WZUDBRGWMFksZFryob8pKKtGuHRFZMK2OCB5I2UQ6bWUIhLOWDKZu9pssq3VWLnqPgiElnQ/kyYps6/NcdU75GT83okRUAACAASURBVKM9Izb8F8Nr+FuzRDo8Z0Q1VUGboBGK8ZYwFfZ47/xIuLpfBfu/7/kvkhUdSUF6yl60JeffLjHF77lU8sMWO3uAS065u9f1fG/2WZt9/tG5D/SpHYNJf0yeJiYm9GHfPbhXZZ/47DODPnm6C6dA2TX45/+7c7vrKAr14UBRcD3ovDO0DemEc0b0PInZn0Id4JDzf9mv9V1yyt3en0/XKCSeTm/+hgLfFLOL8s+1CSxbJtBmOxv57/qRAMQ3JrhLLwWgsOz3BFMB3iyIZn9GZTXPPPZpjj3ptqFpdAkVZnJvghPFKsiPZ3z+QN61XgK2TgH4gZJ7aOAFlrgSwOojsTjKlXPLqGfZ+48CEI/Uo5TUH2sJYTkmgJjtYOdEF9oYXISTMG6QGUW+EGNKW4W5dp6ASTheW7EXefP8hitn1FXzwOOS7PusbtwbAX7/0Lnk3Y5R/pW7sK/7cNaKfcG+CzDvumu32nfNfWdRbYRj2EqwzBFbbSKRZ1ZBBL4TdHEr8pxOFeGWIC1Wlocbm7n3zpM5PFHOAw0dHjeDGb71gLN/wLNzzu5V2U+ecDsAf3joPG/fsVp8wEO5jiXz6+MLGJmTwF2rnVcZHzkMgNZ0iKAtNvkMTeTisuT+r8lWjrE7zr96RBV58th5i7GxQ1mflYQf2ZzY1y2rl+6D20BfzS8AP7lf/N1nxmJeyAGAokHlpXu/CcDB50ne5LIRVwHwmV9Jpzgx0mGeguFtehkIihr7cKVXgl0p9VOt9Td72tefKKWOA36N5AH8o9b6+oG6lk/3FG3uw4lvf+r+TvcXQyZsyRdP75gbePW+7/ZrW2YF4uCAne9ah9vnjG/06zWHml1ZUy/Ss0Po0NFbjf0YYEshfnwn+/oFJePg35vrrgJeVUrN1VoPnxB3Oyg3Pyya68hQwEtEnHVdFmdEEzusooypjmjsb7UnSbkuh1km4JbWnqfJq23tOHr4aizd8UpIshXNzJQzOvFRAKraxpByNgAwdeTx5LISGCsWGkEyL+6USivubW4EYHIkQnmmHoBouIZwJEFKy/m5XCuLyoox85uZld9zUO5rW1ickZHa2FCYSdEQtz5yPpedunk450/feTKfMp6ci9K7oL96J/Snxt5bJVYpdRYwB/iI1rpbb5FuBbtS6ovAl4DJSql5JYfKgRc7P6tfOBBYrLX+0LRjNnAq4Av27eDom07h3JFiVliWzvK2SaRwVl2C1VkR7Ck3hlnASspx2a88SoocL7WkmOlGGBMSd7imQh5Hy+ujtcXRN53Cc5+fO8h3tG0UO7S4NZI1Lf8BIFuRZ2xhPwAWtT9Bg5LFTSOtMLYlmvi74TaOCogd/a/NrbTH1gPw65a3OaOuGqLw0aqLeKPlz9QRoMFESpx11tWDdm+95TtnSzq7K+4+jUkR+U5bCg4JY547+Lyf8p3ZZ/Lo7DM5pirBHGN2Gx0Oe3X84x4JFvfx84dHoLjBRCkI9INg760Sq5QqB64AehXDuyeN/R7gSeA64Fsl+9u01k2dn9IvjAFWlnxeBRw0gNfz6YaXWob35N9w4aNVF232eUvNd0dgR2zz0NBvHi+9VWJ/BPwM+O/eVNqtYNdatwAtwHnmoiOACFCmlCrTWq/o7vztoLMntpnDvVLqc8DnAMaPH99JcZ8t+XZtHf/IymRe1Lb4WCIOwAMbWgmaNF9vJFM8ZxbUrNw0gr8F0uxTJd4fGwsFDigXM82GfJ73GuV81wlxydRN3PTwebzbLhNxv7ng4cG7sT4yw5V2P6MWsn+V3E8oGaAxJy6UVYHxRPOit4QDCaKRWgD+0fwMM2JSfkI47MVHOTRRzm65fVm4Wuz+cUK0l+04Jous8XL58ZcnMiN66VbHG/MFDquQkcrfWzpS+P02I7rXx83ns2493js2Iy7P+M6H9uCoEyT+zJ8ufpSbjClwqNdDbC+KPtnYa5VSpaaTm7XWxWFOj0qsUmoWME5r/ZhSavsFe0nFJwO/BEYDG4AJwHvAzN6cvw2sAhNcXBgLrCktYB7MzSALlAaoHTsVddYMXmmVON5ltmKCiVFeHYLGjGwvz2TIOPJaxONrCVqwJAMtyXpedUJg7OozRqxlSrXEGFmThXtWVAJgB2w+Ny7K7EcvZEU2xz6xjiiTRQ+V4cKeZVFeMaORqbEwwYjc240r5zE2JqaY/4p+nHVarJAfrSjn5Ta554PKy3gbscM35Rzmxd6AAIw2z/Hk44eP7393/OaCh7neeBhd83/LeDuF950VV4/+5i/neuU/UdnhCfSV6FjeDLXyqwfP6dW1LrnjJFalQpw9KsxND5/Hgw0tXFxf5R2/8OQdy3e+Dxp7YzcLlLpVYpVSFvAr4NK+tK23k6c/Bg4GntNaz1JKHYnR4geIV4FpSqlJwGrgXOD8Abze/2/v3sOjLO+Ej39/M5nJgSSEkGAgB4kYsCgKilVrVbalSmkFj/VYbW219tLt9rXbbbvua7vafXfX1t12d31b7V6uurVSdVXQ4qGi1EPlFVFURL0IyiGBAOGQc+Z4v3/czwxDyGRmwpwy+X2uay4mM888zy/Dk3vuuZ/fff/UCLp663IdQlpFKkstX3lN+vc9BsvG/dDJMLrzMdvAD/0A/s5Fy6L3R2rEH7vumcMeuz1m82sf+PIRRJl/0pTHnqgTWwGcAKwW+0FSB6wQkSUjXUBNtmEPGGP2iohLRFzGmJdE5J9Tiz95xpigiNwMPIf9xnOfMeb9TB1vvJja/Flu8djhhue6u6IXf66YMpntg3booHVA2B+0vdXNexs4o24f74TssgL1JS6+5p4KgHEdxS8H7HotLWVevtvwOQDKKqfStn01AF86/Vu8/r4tlFzc5eX5p74ejeXc83M/yQkg7HSOpvUeXM73zppjWR5+D4AHB1/naq/tbM0pauUTj73I7HUJJ/XXAjA4oY+3fX0Ugh2+AN996EIAfnHVE0m/bp6/ktUPX5/Usr0PfhxZP76C+51MpHmVLj716N8D8IO9tnD5Czfm73CeCHhcaWnaR+zEOsPhNQePK6uBvz6irJgYB0SkHHgZeEhEdgPBlMJPkTFmJbAyk8cYj4zLNmTv9vpZPNmOg5bgojdkx9jbfIPMKLVfxWfUd/Fqly/62s5AgOAEO97uKwtyVMhOUmkpLeG1gK3wNLfjBMpKpgCwZ9MamoxNJ9xS/WeO6puV6V8vJUsXP0iNswrmhJI69rIJgHAwwEVFtjFfXfQ2H3ntdaxq8VDhFNvY4QswY7JdH+bnW7r4Ue3kbIefdn9zySPRRj3Wnx66gWCp/X8/UcqjhVsWXPEbFjjbrH74+oT7f+Dapzn7l1+jebLtkO722XMxHPbw+UkV7OFjanuPGWkXeUNIT489XidWRG4H3jTGjCrVLNmGfSkwCPwv4CpgInD7aA6ocqd27mmw41e5DkPlsVR66bGOpMDGCzc+GV0y+JRLf0x6i0BmTrry2IfrxBpjbouz7YJk9plUw26Mif2e+UAyr1H5qW7QXu++pm491R7b+1y2Zy+LnIr0TSWTebXLXhQsd3v5zMRS1vXaC4YDgRL+ea9d0fD64mourrC91HX+Xpr7be+/tnEerdvtUr9ul4fyigYAGntOocP1TjZ+xZRMKLHXDyZOnM7+vXbiUigcIOCzQwTnlS/gKd9LANy/v5Nbptrtd5sAnYEgNZ4irppaxqA3kIPocyu2NF5kNu+pl/00pX3c0lRj13IXmNJzLG89egcAJ1/6v9MbbJqlq8eeKYkmKPXAsKvJC2CMMfEX+1Z57cySL7HWbzsJfzvlEh7pfhqALT4fVW57WpzhqaDd7aO5tJhZfQ3cE9hI2MmKqekrjlbQmVveiuy3jwd9vZSKM/bs309lkf1q7fGU0+g5k+2DmZzXlrpJk2YC8GLPY8yRFgB62YE4yQq9A+0MOn8C59dUES6y95s6K3ggbGek/nDKJVTVHce7m+9l1f98A4DPXzz268xGnHPVkU1Aim30X/6r+6OPP7fyaxA+ODw4tgiuYRNa8kOiPPaKkZ5XY09kzZKPn09+Isqsvoa0Hb8QCmsrK7aIdTI99bW//7tht1u0aOwNAgjgHuuLgKnCUzPjVKatsb3nHbzGmW6bh95cNUCVM61c+oV6KeYJ7BDFN90zGCizFww3Sz9fqLTDOr6Obqon2guj2ztW86k5Nvtl88bH+Xj7wWs/UyclV5ggm/p6dwJQ7/ZGl9ctL5qGS+yfRpfZyme8tn8zKXA0PfttJpBxhekL2gvOH5pVVHz4KjNqFw17jEjZOE+f3Wch9eZjdbyxOnq/c8d6ANyu4ujKlqdc+uNDtvf2jO3mJx1LCmTK2H5n1RGZfZIdI+3r3MbW9ucB6A/1RRf6OjrgRXzCYiZT4WnE5fLgGnD+SL2T2L3HFpXeP2k37/vssMSppfN4a9t/AHBs9bl09xycnDxz8XXZ+cWS1P7n59nTY8f9J3ubqaiy6cT7u1vxFttRxopAPX8OfwTACWznU812+kbQP8BnO38LQFm/h2BxCN9g19BDsPXFxynuPrg6Ztg9FocdkrOvw2YkFxWVUjNtLgBde1qpnmQ/9Le+aK+9HP25iwAIecNsYoAPnSWVY1fgzHf5Psaez7GpDKqc0ZL0thWexsQbqaQUam99PHJJcrdc0B67AogOnzy54f9w7kTbWx2s8uMLG97ot5OaTiouZZvbTmSq9nTwUb/NcZ9tSlhYYfOfP/L9AU+/HcrZ1vMSlUX5u45PcXk1JW4798MlHopL7HBUQ9UCDuz5ELBL8n7O2CGkoBnkv9vuAeCz/gZOEDtE01B9NgBb+l+i0fsZADY8+QsA2r3r2Vua0SkfeeHFR7/JrHo7c7W4vBpfr11rZ+px59D2vv02uK/nQ2Z+6gpefMxeYPb4izgJO+Gte9Jenn3mWhZ9MTPj7Yt+Y4fJnr3+2bTsz/bYdShG5aHJJ8yz/zKPtlftdPCvFn+aMrETjEJ+HxMqp1EWtDMAb2+FEmeM/XuNk1labJd/6uvZyX/32mnpJ1eU0lxmG7qX/H8ENnJqYHbWfqdUlXjtWiUTJtQRCtpljMvK6qmqPQ6AkP9gdaHe7jbm4aR1lp/EZl4EoNW8wLGykIUX3hfdNtKwV/c0sKV4c+Z/kRzo3ryJT96x50ZjxVm4XPYD3de7D2+ZTZ8NBf3UHXsWAHWcxSfvP8GcFrvc7+YPHqduip0IFty5ih0Te1n21NUAXH7+bxMeP5JuCYdeyM0KsbNP85U27Crj8jUnuebE+bS3vpSWfR173qFL9p5wgZ3Rmu5qTWp00tVTj5XP49jasCsASqvsxJum6np2bloNwJ7+95hR1cjRRbbH9fcNr7HO6bGWdHl5a8KfATg23MAZJXZY4kA4wEcu+/qS9KylkRXtna9wTOMSAMJBPyWVNhd/4EAHAeeiaDA0SJW/HgBTFGJ66DNx97d3g13vpLy0npmDHZkMPSv2bTxYZ8dTaou1DOzvYPqc8wHw93VRXFENgLjcuLwHa6KKswxDX8c2mloWEQ7a4byjm89lU6st+NFYt4DBwdE1vt0VfpavvIali7O3oqYgmu6o8l9RsV0EK+gbwO2yVXJcITeb25fTV2XH0re5Azy3y94/eUqYrU6ZtKneTu7csyu6r79sHDvrpkTy+t994i4Cg3aYqchbesgQTGml/dDzllWzcY9d6bC3t4Nj6y6Iu18Ttmur7Op7C0EwYzwbZuIMm9kysKuDvt0208ntLSXs/J6uIi/iDMW4S0sxoVD0taEB+16G/AOUTa6n9S37HjafeBEel+0QTJw2m/4tfwCgrmsaax7+QbSQdjyR4ZdMrNKZjHzutmjDrtJurKxFnk1/8ZWxt5zvWJHNnnpEOmueZoI27AqwPS6A3R+8RChse+UNU86mu2cbpttOUNoW7qPOyfD4xf52wk6+e0W1my/XVPB0Z08OIk+PysomBvttJoc3OImBbvsNRFwutmy1Sy8UF03EG7R/Ml6ZSFvbn2hoOIfGswtrnfGRiNuN1xly6d/bTjH2fuT8ATChEKFB20t3ebzRby9lk+sx4RAzT7M97LZ3nmXmiXaF2pB/gDOn3QzAu4P3UhmYOur1Z7Ilf5t1bdiVIzBgF/pyFxWz/4Bdvtbl8tA72M5vQ7aRu3xiLa+57HDFa12D1Hptw758lx2SWfXtP2Q77LTp79tNdZ2dSesq8lLkdYam/ANUTZgBwITyOtp2vQJAIGw/xOI16jUn2myPBSfGK5wztridItbidkfH2CuOasbfZ68/uD3e6DkU7vHjnWCzYsIBPy6PbfQ7N73BpKY50e2aTl0SHabx9ewj0G/35en3cKCyPUu/2ejlKkc9GdqwK6VUimzN0/xt2bVhVwD4nQkllTXHRCfqbOp+muDkELOc6kJ37d7BXzbYHPd3+vo4vdKpf1l5sNc+lnmc3Ove3Z8QDtvfx+Xy0tW3BYCHBl5hdkVJdPuju8ffGnmR3jdAyDcQzXDZ8/FaAj7bE6+onh4dfon03AEmNszGXVwaLQId6OmKDuEc2PkB4ZBd+ri56cuEAgcvXucr7bGrvDfYY8vfeUonsqPjdQCkWCgadPOmU8D59MpydvntGPtJEyYws7Q4+vpV317GWDZ7yU3RFMVJR8+hb4/N/AgF/ZSX2hTHc4O9MAhtJbbROfvqX+cm2Bzqbf8kmtYYCvjp22ffp0n1J9C/zw6fRK5JzDreTjYaPGDPrUB/F97yWex4bxUA0+Z8nmC/Pbca5i+Ofhi8+voPKOs5+IEwnUsz/WuNSh6369qwq4PaOl6mudnmJVeU1dMf2hd97uLaaj7oszMzd/oKs6hEZCZu92Z7jcHf34XbW3rYdl9d8lBW48pHkZ46gIh72G1q554GQOe7h5fnrKo7Lno/9sLr7vdeTVeIGSWiWTFqDGjreBmAjvbXqSizPdRqMwNX2MPFtfaP2C0SbdSbSrzs8Ae4bmny67qPFaGgH79zIc/fv5+K8nqOOffKBK8aH/bveJ8pM2yD7fZ4o416z65Wpn/e9qwbOfSCcuRCctemD1m/+ufMPN6u5ji4f090qKZv5zZ6d38CwDQzD8rh+KXfyfwvdAQ0j13ltVceuonGugUA7NrzNoP+LYAtG1dTdxKn9tqG/tdblwOwsDacizBVgYhM/gr5B1j6R7vE8zMX3Mrrfc6yvn1T03Kc7/3OLg9815WPp2V/Q6Wrwy4ii4BfYotZ/6cx5p+GPH8jcBMQAnqBG4wxG0fapzbsiu7qnmjBifLSuujFwvayjfzPNvs1emZZMVfV297ZNy/4fU7izJZJs45n0qzjcx1GXmpZdO0hP09sOS7Oloeb2HIcZ7b8IlqQY8OOB/lJMwSLQ/y/9T+lBi+dRem9CD+zrJh7nryCgZDtjHz34vScu0J6Cm2I/cpzN/AFoA1YKyIrhjTcvzPG/NrZfgnwL8DwVV0c2rCrhGaWFSfeSKkk1X16AQAbnnyQYHHokOeSWdUxWXdd+Tj3PHlF2vY3VJo67J8GWo0xHwOIyDJgKRBt2I0x3THbT2D4OtSH0IZdURR20e6yGSHFPQer/XiCbs6aUMEuM/ZTGVV+cgXtSHW4yNDsWcCmZ+167EO/GYxWJNe83O2mNxRKsHXyJLVi1jUiEnsF+V5jTKRCeD2wPea5NuC0w44nchNwC+AFPpfogNqwj2ML/t32Zi6pN5w8OA2AyrImitw2E2QbawDNAlGZsfCC+3j+KVvgZU1/L73lL0Sfa2H0Dft/PmkvdDd6vDR6vGwP2I5JuoZhIlLIY+80xsSbgjzcXg7rkRtj7gbuFpErgb+Dkd8gbdhVXE2cznHnfyvXYagCdu75/wXAmt9nLlf9mxdkJnMrTUMxbUBs7ckGYMcI2y8DfpVop9qwj2NXOqdTkbgYKLHpfcWBLrzeyhxGpdTo/eOjX2Fehf3GaTBIKDO55mlc3XEt0CIizUA7cDlwSG6tiLQYYzY5P34J2EQC2rCPY3sDdsyx2uPGO2Cnyu+t3MLe0Bamu8/KZWhqnLntskeP6PXfd1Ib51WUsS8QotpjM7jO+9L9RxrasETAnYZ23RgTFJGbgeew6Y73GWPeF5HbgTeNMSuAm0VkIRAA9pNgGAZy1LCLyE+A64E9zkN/a4xZ6Tz3I+Ab2JzN7xhjnstFjOPdzMXX5ToEpUZt0aLMFMWOla5i1k7bt3LIY7fF3P+rVPeZyx77vxpjfh77gIjMxn4VOR6YBrwgIjONMem7nK2iSpyrP81FxZQU2aLOA/RT2l3GKw/dBMBZV92ds/iUymc68zR5S4Flxhgf8ImItGLzPF/PbViFyRe2F99LDpSwpWorAE29jeAGf6grl6EplZKfZWh2aTxC+maeZkIuG/abReQa4E3ge8aY/diczjUx27Q5j6ks0566UiNL11BMJmSsYReRF4C6YZ66FZuucwc2X/MO4C7gOpLM6XT2fwNwA0BTU1MaIh5/Qk5pu/LiejaHPgBgY+kWZg9MP2S7ZU9dHb2fzpmBSo1VQnounmZKxhp2Y8zCZLYTkd8ATzs/Jp3T6czcuhdg/vz5Y7sEfI6tkg3M9tplA3xhQ3vF9gSvSN1/LbcZXF8vwNUg1fikhTaGEJGpxpidzo8XAhuc+yuA34nIv2AvnrYAb+QgxHHh1q/YFLOfPXbZiNtpL12pw8l4HIpJ4E4RmYsdZtkCfAvAyd98BLsAThC4STNilFL5xk5QynUU8eWkYTfGfHWE5/4B+IcshjPuff+SzC/Dq0MwqtBouqNSShUU0aEYpZQqJIL22JVSquCI0R67UkoVljxOstaGXSmlRkG0YVdKqUKjQzFKKVU4DEg410HEpw27UkqNhl48VUqpwqJj7EopVWi0YVdKqcIh5Hceez5PnlJKqfxkUrglICKLROQjEWkVkR8O8/wtIrJRRN4VkVUicnSifWrDrpRSoyAmuduI+xBxA3cDXwRmA1c4tZ9jvQ3MN8acCDwG3JkoNm3YlVIqZWKzYpK5jezTQKsx5mNjjB9Yhq39HGWMeckY0+/8uAZbgGhE2rArpdQopKPHjq3pHFuyLFGd528AzyTaqV48VUqp0Ug+K6ZGRN6M+flep7QnpFbn+WpgPnBOogNqw66UUqkyKWXFdBpj5sd5Lqk6zyKyELgVOMcY40t0QB2KUUqp0UhPVsxaoEVEmkXEC1yOrf0cJSLzgHuAJcaY3cmEpj12pZQahXTksRtjgiJyM/Ac4Abuc2o/3w68aYxZAfwMKAceFRGAbcaYJSPtVxt2pZQajTTNPDXGrARWDnnstpj7C1PdpzbsSimVIjvzNH9HsrVhV0qplAl2blF+0oZdKaVGQfI490QbdqWUSpngEk+ug4hLG3allEqRIAg6FJNR69at6xSRrRnafQ3QmaF9j5bGlByNKTnjLaaEqyMmQ0SHYjLKGFObqX2LyJsjzBrLCY0pORpTcjSm0dCLp0opVVgEXNpjV0qpwiHaYx/z7k28SdZpTMnRmJKjMaVMcEn+Np/5G1meiFleM29oTMnRmJKjMY2O9tiVUqqgiGbFKKVUIRHJ7x57/n7k5ICIXCoi74tIWETmxzw+XUQGRGS9c/t1zHOniMh7ToXxfxNnXc1Mx+Q89yPnuB+JyHkxj49Y9TzN8f1ERNpj3pvFieLLhmy+Bwni2OKcH+sjVXREpFpE/igim5x/J2U4hvtEZLeIbIh5bNgYxPo35317V0ROzmJMeXkuDc/22JO55YI27IfaAFwEvDzMc5uNMXOd240xj/8KuAFocW6LshGTU8n8cuB455j/V0TcSVY9T7d/jXlvVo4UX4bjwDl2Lt6DkfyF895EPph/CKwyxrQAq5yfM+l+Dj8v48XwRQ6eyzdgz+9sxQR5di6NxCXupG45iS0nR81TxpgPjDEfJbu9iEwFKo0xrxtjDPAgcEGWYloKLDPG+IwxnwCt2IrnCaueZ0m8+LIhX96DeJYCDzj3HyDN58xQxpiXgX1JxrAUeNBYa4Aq5zzPRkzx5PJcGpYguFyepG65oA178ppF5G0R+ZOInOU8Vo+tWRiRqMJ4OsWrbp5q1fN0uNn52n5fzLBCLuKIyOWxhzLA8yKyTkRucB47yhizE8D5d0oO4ooXQ67fu3w7l+IQBFdSt1wYdxdPReQFoG6Yp241xiyP87KdQJMxZq+InAI8KSLHk0KF8QzEFO/Yw51JR1TrZaT4sF/V73COcQdwF3DdCPFlQy6PPdSZxpgdIjIF+KOIfJijOJKVy/cuH8+l4YmuFZNXRlNmyqkK7nPurxORzcBMbM+hIWbTYSuMZyImRq5unrDqeSqSjU9EfgM8nUR8mZbLYx/CGLPD+Xe3iDyBHULYJSJTjTE7nWGOpAoUp1m8GHL23hljdkXu59G5FIcgrpwP88eVvx85eUREaiMXa0TkGOyFpY+dr7A9InK6kw1zDRCvh51uK4DLRaRYRJqdmN4giarn6TRk/PVC7MXekeLLhqy+B/GIyAQRqYjcB87Fvj8rgGudza4le+dMrHgxrACucbJjTge6IkM2mZan51Jc6cqKSZTBJSJni8hbIhIUkUuSiW3c9dhHIiIXAv8O1AJ/EJH1xpjzgLOB20UkCISAG40xkQs/38Ze4S8FnnFuGY/JqWT+CLARCAI3GWNCzmsOq3qezpiGuFNE5mK/Gm8BvgUwUnyZFq/yezaOPcRRwBP2M58i4HfGmGdFZC3wiIh8A9gGXJrJIETkYWABUCMibcCPgX+KE8NKYDH2AmU/8PUsxrQg386leEQEl/vIL4zGZHB9AfvNZK2IrDDGbIzZbBvwNeCvk96vTeZQSimVrDkzjzHL/+OnSW0747yr1sVbglhEzgB+4nQgEZEfARhj/nGYbe8HnjbGPJbomNpjV0qpURBXWkayh8v4Oe1Id6oNu1JKpUgkpWV7a8SZdey4N2aRs4xk/GjDrpRSnMRVrAAAAgRJREFUKUupYe8coRpURjJ+tGFXSqlRSFO6YzSDC2jHZnBdeaQ71XRHpZRKlQiuIm9St5EYY4JAJIPrA+ARJwvodhFZYg8lpzqZQ5cC94hIwgwv7bGrMU1Eeo0x5bmOQ40vksYJSs5iZyuHPHZbzP21HDoRMiFt2JVSKlUiuHTmqVKZ5cyU/JmIbBC7/vllzuMLRGS1iDwmIh+KyEPOLGGljoi43EndckF77KpQXATMBU4CarAz+CJr2M/DruW9A3gNOBN4NRdBqgIhulaMUtnwWeBhY0zIWUzqT8CpznNvGGPajDFhYD0wPUcxqgIhaI9dqWwYaXjFF3M/hJ736kiJK2HGSy5pj10VipeBy5zygLXYhdtyvgKgKlzaY1cq854AzgDewU7J/htjTIeIHJfbsFRByvMxdm3Y1ZgWyWF3as5+37nFPr8aWB3z881ZDE8VKAHErQ27UkoVjjzPY9eGXSmlUiRIXl881YZdKaVSJaJDMUopVVBEx9iVUqrAaI9dKaUKiw7FKKVUYYksKZCvtGFXSqlUuVy4vZoVo5RSBUWHYpRSqoBIno+xi52JrZRSKlki8ix23f9kdBpjFmUynqG0YVdKqQKjy/YqpVSB0YZdKaUKjDbsSilVYLRhV0qpAqMNu1JKFRht2JVSqsBow66UUgVGG3allCow2rArpVSB+f8foHz4lThd4wAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# dim = (start, stop, step)\n", - "lat = podpac.crange( 90, -90,-2.0)\n", - "lon = podpac.crange( -180, 180, 2.0)\n", - "# dim = value\n", - "time = '2018-05-19T12:00:00'\n", - "\n", - "c_world = podpac.Coordinates([lat, lon, time], dims=['lat', 'lon', 'time'])\n", - "o = sm.eval(c_world)\n", - "figure()\n", - "o.plot(cmap='gist_earth_r')\n", - "axis('scaled')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/doc/serve-docs.sh b/doc/serve-docs.sh index f0eab15e8..ac439d9e7 100755 --- a/doc/serve-docs.sh +++ b/doc/serve-docs.sh @@ -8,4 +8,4 @@ if [ "$1" == "clean" ]; then fi # build sphinx-docs -sphinx-autobuild -b html -i source/example-links.inc -i source/api -i source/changelog.md source build +sphinx-autobuild -b html -i source/example-links.inc -i source/api/\*.rst -i source/changelog.md source build diff --git a/doc/source/_static/img/node.png b/doc/source/_static/img/node.png index 263ac35eb..97f984c07 100644 Binary files a/doc/source/_static/img/node.png and b/doc/source/_static/img/node.png differ diff --git a/doc/source/api.rst b/doc/source/api.rst index ea35689af..fe6be16a3 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -70,6 +70,7 @@ Generic data source wrappers podpac.data.Array podpac.data.CSV + podpac.data.Dataset podpac.data.H5PY podpac.data.PyDAP podpac.data.Rasterio @@ -86,8 +87,8 @@ Generic data source wrappers podpac.data.DataSource podpac.data.Interpolation - podpac.data.INTERPOLATION_SHORTCUTS podpac.data.INTERPOLATION_DEFAULT + podpac.data.INTERPOLATION_METHODS Interpolators @@ -107,29 +108,6 @@ Classes to manage interpolation podpac.interpolators.ScipyPoint -Pipelines ---------- - -.. autosummary:: - :toctree: api/ - :template: class.rst - - podpac.pipeline.Pipeline - podpac.pipeline.PipelineError - -.. rubric:: Pipeline Outputs - -.. autosummary:: - :toctree: api/ - :template: class.rst - - podpac.pipeline.Output - podpac.pipeline.NoOutput - podpac.pipeline.FileOutput - podpac.pipeline.FTPOutput - podpac.pipeline.S3Output - podpac.pipeline.ImageOutput - Algorithm Nodes --------------- @@ -199,8 +177,9 @@ Stitch multiple data sources together :toctree: api/ :template: class.rst - podpac.compositor.Compositor podpac.compositor.OrderedCompositor + podpac.compositor.UniformTileCompositor + podpac.compositor.UniformTileMixin Datalib @@ -212,7 +191,6 @@ Interfaces to external data sources :toctree: api/ :template: class.rst - podpac.datalib.smap podpac.datalib.SMAP podpac.datalib.SMAPBestAvailable podpac.datalib.SMAPSource @@ -242,8 +220,8 @@ Utilities :toctree: api/ :template: class.rst - podpac.authentication.SessionWithHeaderRedirection - podpac.authentication.EarthDataSession + podpac.authentication.RequestsSessionMixin + podpac.authentication.S3Mixin .. rubric:: Settings @@ -263,6 +241,19 @@ Utilities podpac.utils.create_logfile podpac.utils.clear_cache + podpac.utils.cached_property + podpac.utils.NoCacheMixin + podpac.utils.DiskCacheMixin + podpac.utils.NodeTrait + + +.. rubric:: Style + +.. autosummary:: + :toctree: api/ + :template: module.rst + + podpac.style.Style .. rubric:: Version diff --git a/doc/source/aws-development.md b/doc/source/aws-development.md index 034e7602b..4db7e16f7 100644 --- a/doc/source/aws-development.md +++ b/doc/source/aws-development.md @@ -1,27 +1,80 @@ # AWS Development -This document describes more details on using the AWS functionality of PODPAC. +This document provides details on how PODPAC integrates with AWS services for serverless cloud processing. +See the [AWS Quick Start Guide](aws.html) for a quick guide to building and using AWS services with PODPAC. ## AWS Architecture -All files related to creating a Lambda function are in `dist/aws`. The `DockerFile` is based on Amazon's EC2 DockerHub distribution, and creates a Podpac-friendly python 3.6 environment. A `.zip` file is extracted from this environment, which can be used to create a Lambda function. Conveniently, developers can also use this to create an EC2 instance, or work directly in the Docker container. +## Creating PODPAC resources for AWS -Our `handler.py` expects the Lambda event to include a pipeline definition in the form of (URI encoded) JSON. The handler then executes that pipeline accordingly. However, developers are encouraged to write their own handlers as needed. +> [Docker](https://www.docker.com/) is required for creating PODPAC resources for AWS services. -## Creating Your Own Podpac Lambda Function +All files related to creating PODPAC resources for AWS live in [`dist/aws`](https://github.com/creare-com/podpac/tree/master/dist/aws). -We're now set up to create an AWS Lambda function "out of the box". Assuming you've installed Docker, here are the steps to create a Lambda function: +- `handler.py`: [AWS Lambda function handler](https://docs.aws.amazon.com/lambda/latest/dg/python-programming-model-handler-types.html). Handles PODPAC Lambda trigger event, executes the pipeline, and returns the result back to the source of the trigger. Developers can override the default function handler for a [`Lambda`](api/podpac.managers.Lambda.html) Node using the [`function_handler`](api/podpac.managers.Lambda.html#podpac.managers.Lambda.function_handler) attribute. +- `DockerFile`: Docker instructions for creating PODPAC deployment package and dependencies using [Amazon's EC2 DockerHub](https://hub.docker.com/_/amazonlinux/) distribution. +- `build_lambda.sh`: Bash script to build PODPAC deployment package and dependencies using [Docker](https://www.docker.com/). Outputs `podpac_dist.zip` and `podpac_deps.zip` in the `dist/aws` directory. + - `podpac_dist.zip`: PODPAC [deployment package](https://docs.aws.amazon.com/lambda/latest/dg/gettingstarted-features.html#gettingstarted-features-package) ready to create a Lambda function. + - `podpac_deps.zip`: PODPAC dependencies that are hosted on S3 and dynamically extracted during Lambda function execution. These files are seperate from `podpac_dist.zip` to circumvent the space limitations of AWS Lambda functions. +- `upload_lambda.sh`: Convience script to upload deployment package and dependencies to an S3 bucket. -- Run `docker build -f DockerFile --tag $NAME:$TAG .` from the `dist/aws` directory -- Create a Lambda using the resulting `podpac:latest/tmp/package.zip` - - For example, we've chosen to do this as follows: - - ```bash - docker run --name lambda -itd $NAME:$TAG - docker cp lambda:/tmp/package.zip package.zip - docker stop lambda - docker rm lambda - ``` - - Upload package.zip (~67 MB) to S3. - - Create a Lambda function from the AWS developer console - - Copy the link address of package.zip from its S3 bucket, paste into the Lambda's "Function code" field - - Set up any other Lambda properties you'd like. We use S3 triggers - the handler is triggered when pipeline JSON is uploaded to our S3 bucket +To create a custom deployment package or dependencies package: + +- Edit the `Dockerfile` or `handler.py` with desired changes + - To build using the local copy of the PODPAC repository,see comment in `Dockerfile` at ~L36. +- Build the deployment package and dependencies + +```bash +$ bash build_lambda.sh +Creating docker image from podpac version master +podpac:master +... +Built podpac deployment package: podpac_dist.zip +Built podpac dependencies: podpac_deps.zip +``` + +- You can now use PODPAC to create a function from these local resources: + +```python +import podpac + +# configure settings +settings["AWS_ACCESS_KEY_ID"] = "access key id" +settings["AWS_SECRET_ACCESS_KEY"] = "secrect access key" +settings["AWS_REGION_NAME"] = "region name" +settings["S3_BUCKET_NAME"] = "bucket name" +settings["FUNCTION_NAME"] = "function name" +settings["FUNCTION_ROLE_NAME"] = "role name" + +# define node +node = podpac.managers.aws.Lambda(function_source_dist_zip="dist/aws/podpac_dist.zip", + function_source_dependencies_zip="dist/aws/podpac_deps.zip" + ) + +# build AWS resources +node.build() +``` + +- You can also upload `podpac_dist.zip` and `podpac_deps.zip` to a public or user-accessible S3 bucket and build PODPAC functions from the remote bucket. The bash script `upload_lambda.sh` can do this for you if the `BUCKET` variable is customized. +We'll assume you copy the files to `s3://my-bucket/directory/podpac_dist.zip` and `s3://my-bucket/directory/podpac_deps.zip`: + +```python +import podpac + +# configure settings +settings["AWS_ACCESS_KEY_ID"] = "access key id" +settings["AWS_SECRET_ACCESS_KEY"] = "secrect access key" +settings["AWS_REGION_NAME"] = "region name" +settings["S3_BUCKET_NAME"] = "bucket name" +settings["FUNCTION_NAME"] = "function name" +settings["FUNCTION_ROLE_NAME"] = "role name" + +# define node +node = podpac.managers.aws.Lambda(function_source_bucket="my-bucket", + function_source_dist_key="directory/podpac_dist.zip", + function_source_dependencies_key="directory/podpac_deps.zip" + ) + +# build AWS resources +node.build() +``` diff --git a/doc/source/aws.md b/doc/source/aws.md index 0a0844f61..911a204da 100644 --- a/doc/source/aws.md +++ b/doc/source/aws.md @@ -1,5 +1,22 @@ # AWS Integration -PODPAC integrates with AWS to enable processing in the cloud. +PODPAC integrates with AWS to enable processing in the cloud. To process on the cloud you need to: -> This document is under construction. See the [AWS Lambda Tutorial Notebook](https://github.com/creare-com/podpac-examples/blob/master/notebooks/developer/aws-lambda-tutorial.ipynb) for more details. +1. Obtain and AWS account +2. Generate and save the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` (see [AWS documentation](https://aws.amazon.com/blogs/security/wheres-my-secret-access-key/)) +3. Build the necessary AWS resources using PODPAC (see the [Setting up AWS Lambda Tutorial Notebook](https://github.com/creare-com/podpac-examples/blob/master/notebooks/4-advanced/aws-lambda.ipynb)) + +After these steps, nearly any PODPAC processing pipeline can be evaluated using AWS Lambda functions. + +```python +import podpac +... +output = node.eval(coords) # Local evaluation of node +cloud_node = podpac.managers.Lambda(source=node) +cloud_output = cloud_node.eval(coords) +``` + +This functionality is documented in the following notebooks: +* [Running on AWS Lambda Tutorial Notebook](https://github.com/creare-com/podpac-examples/blob/master/notebooks/3-processing/running-on-aws-lambda.ipynb) +* [Setting up AWS Lambda Tutorial Notebook](https://github.com/creare-com/podpac-examples/blob/master/notebooks/4-advanced/aws-lambda.ipynb) +* [Budgeting with AWS Lambda Tutorial Notebook](https://github.com/creare-com/podpac-examples/blob/master/notebooks/4-advanced/aws-budget.ipynb) diff --git a/doc/source/cache.md b/doc/source/cache.md index b9dcf2fd3..a9977ef70 100644 --- a/doc/source/cache.md +++ b/doc/source/cache.md @@ -55,16 +55,18 @@ smap = podpac.datalib.smap.SMAP(cache_output=True) Different instances of the same node share a cache. For example: ```python ->>> coords = podpac.Coordinates([podpac.clinspace(40, 39, 16), +[.] import podpac +[.] import podpac.datalib +[.] coords = podpac.Coordinates([podpac.clinspace(40, 39, 16), podpac.clinspace(-100, -90, 16), '2015-01-01T00', ['lat', 'lon', 'time']]) ->>> smap1 = podpac.datalib.smap.SMAP() ->>> o = smap1.eval(coords) ->>> smap1._from_cache +[.] smap1 = podpac.datalib.smap.SMAP() +[.] o = smap1.eval(coords) +[.] smap1._from_cache False ->>> del smap1 ->>> smap2 = podpac.datalib.smap.SMAP() ->>> o = smap2.eval(coords) ->>> smap2._from_cache +[.] del smap1 +[.] smap2 = podpac.datalib.smap.SMAP() +[.] o = smap2.eval(coords) +[.] smap2._from_cache True ``` diff --git a/doc/source/conf.py b/doc/source/conf.py index 081d05495..d5d479b72 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -227,7 +227,7 @@ def generate_example_links(): prestring = "- " string = "\n".join( [ - prestring + " `{} <{}>`__".format(f, base_link + "/" + f) + prestring + " `{} <{}>`_".format(f.split('.ipynb')[0].replace('-', ' ').capitalize(), base_link + "/" + f) for f in files if f.endswith("ipynb") ] @@ -237,7 +237,7 @@ def generate_example_links(): f for f in files if os.path.isdir(os.path.join(nbpath, f)) - and f not in [".ipynb_checkpoints", "Images", "old_examples", "presentations"] + and f not in [".ipynb_checkpoints", "developer", "__pycache__"] ] subdirs.sort() for sd in subdirs: @@ -245,11 +245,11 @@ def generate_example_links(): link = base_link + "/" + sd fs = os.listdir(path) fs.sort() - string += "\n- {}\n".format(sd) + string += "\n- {}\n".format(sd.replace('-', ' ').title()) prestring = " -" string += "\n".join( [ - prestring + " `{} <{}>`__".format(f, link + "/" + f) + prestring + " `{} <{}>`_".format(f.split('.ipynb')[0].replace('-', ' ').capitalize(), link + "/" + f) for f in fs if f.endswith("ipynb") ] diff --git a/doc/source/coordinates.md b/doc/source/coordinates.md index 92748bbff..5ea9e293c 100644 --- a/doc/source/coordinates.md +++ b/doc/source/coordinates.md @@ -26,26 +26,28 @@ are `'lat'`, `'lon'`, `'time'`, and `'alt'`. Unstacked multidimensional coordinates form a grid of points. For example, the following Coordinates contain three dimensions and a total of 24 points. ``` ->>> lat = [0, 1, 2] ->>> lon = [10, 20, 30, 40] ->>> time = ['2018-01-01', '2018-01-02'] ->>> Coordinates([lat, lon], dims=['lat', 'lon']) +[.] from podpac import Coordinates +[.] lat = [0, 1, 2] +[.] lon = [10, 20, 30, 40] +[.] time = ['2018-01-01', '2018-01-02'] +[.] Coordinates([lat, lon], dims=['lat', 'lon']) Coordinates - lat: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4], ctype['midpoint'] ->>> Coordinates([lat, lon, time], dims=['lat', 'lon', 'time']) + lat: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4] +[.] Coordinates([lat, lon, time], dims=['lat', 'lon', 'time']) Coordinates - lat: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4], ctype['midpoint'] - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2] ``` You can also create coordinates with just one dimension the same way: ``` +>>> from podpac import Coordinates >>> Coordinates([time], dims=['time']) Coordinates - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2], ctype['midpoint'] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2] ``` ### Stacked Coordinates @@ -57,31 +59,33 @@ that the name for this stacked dimension is 'lat_lon', using an underscore to co The following example has a single stacked dimension and a total of 3 points. ``` ->>> lat = [0, 1, 2] ->>> lon = [10, 20, 30] ->>> c = Coordinates([[lat, lon]], dims=['lat_lon']) ->>> c +[.] from podpac import Coordinates +[.] lat = [0, 1, 2] +[.] lon = [10, 20, 30] +[.] c = Coordinates([[lat, lon]], dims=['lat_lon']) +[.] c Coordinates - lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3], ctype['midpoint'] ->>> c['lat_lon'].coordinates[0] + lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3] +[.] c['lat_lon'].coordinates[0] (0.0, 10.0) ``` Coordinates can combine stacked dimensions and unstacked dimensions. For example, in the following Coordinates the `(lat, lon)` values and the `time` values form a grid of 6 total points. ``` ->>> lat = [0, 1, 2] ->>> lon = [10, 20, 30] ->>> time = ['2018-01-01', '2018-01-02'] ->>> c = Coordinates([[lat, lon], time], dims=['lat_lon', 'time']) +[.] from podpac import Coordinates +[.] lat = [0, 1, 2] +[.] lon = [10, 20, 30] +[.] time = ['2018-01-01', '2018-01-02'] +[.] c = Coordinates([[lat, lon], time], dims=['lat_lon', 'time']) Coordinates - lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3], ctype['midpoint'] - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2], ctype['midpoint'] ->>> c['lat_lon'].coordinates[0] + lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2] +[.] c['lat_lon'].coordinates[0] (0.0, 10.0) ->>> c['time'].coordinates[0] +[.] c['time'].coordinates[0] numpy.datetime64('2018-01-01') ``` @@ -98,18 +102,13 @@ Unlike `np.arange`: * the stop value will be included in the coordinates if it falls an exact number of steps from the start ``` +>>> import podpac >>> c = podpac.crange(0, 7, 2) >>> c.coordinates array([0., 2., 4., 6.]) -``` - -``` >>> c = podpac.crange(0, 8, 2) >>> c.coordinates array([0., 2., 4., 6., 8.]) -``` - -``` >>> c = podpac.crange('2018-01-01', '2018-03-01', '1,M') >>> c.coordinates array(['2018-01-01', '2018-02-01', '2018-03-01'], dtype='datetime64[D]') @@ -124,17 +123,13 @@ Unlike `np.linspace`: * tuple inputs are supported for stacked coordinates ``` +>>> import podpac >>> c = podpac.clinspace(0, 8, 5) >>> c.coordinates array([0., 2., 4., 6., 8.]) -``` - -```>>> c = podpac.clinspace('2018-01-01', '2018-03-01', 3) +>>> c = podpac.clinspace('2018-01-01', '2018-03-01', 3) >>> c.coordinates array(['2018-01-01', '2018-01-30', '2018-02-28'], dtype='datetime64[D]') -``` - -``` >>> c = podpac.clinspace((0, 10), (1, 20), 3) >>> c.coordinates MultiIndex(levels=[[0.0, 0.5, 1.0], [10.0, 15.0, 20.0]], @@ -148,28 +143,26 @@ extremely large number of points. TODO -### Coordinate Properties - -TODO ctype, etc - ### Alternate Constructors Unstacked coordinates can also be created using the `Coordinates.grid` alternate constructor: ``` +>>> from podpac import Coordinates >>> Coordinates.grid(lat=[0, 1, 2], lon=[10, 20, 30, 40]) Coordinates - lat: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4] ``` Stacked coordinates can be created using the `Coordinates.points` alternate constructor: ``` +>>> from podpac import Coordinates >>> Coordinates.points(lat=[0, 1, 2], lon=[10, 20, 30]) Coordinates - lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3], ctype['midpoint'] + lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3] ``` For convenience, a tuple can be used to generate uniformly-spaced coordinates. If the third item is an integer, it @@ -194,18 +187,17 @@ Coordinates.points(lat=[0, 1, 2], lon=[10, 20, 30], order=['lat', 'lon']) TODO ``` +from podpac.coordinates import UniformCoordinates1d, ArrayCoordinates1d, Coordinates, StackedCoordinates >>> lat = UniformCoordinates1d(0, 1, size=100, name='lat') >>> lon = UniformCoordinates1d(10, 20, size=100, name='lon') >>> time = ArrayCoordinates1d(['2018-01-01', '2018-02-03'], name='time') >>> Coordinates([StackedCoordinates([lat, lon]), time]) Coordinates - lat_lon[lat]: UniformCoordinates1d(lat): Bounds[0.0, 1.0], N[100], ctype['midpoint'] - lat_lon[lon]: UniformCoordinates1d(lon): Bounds[10.0, 20.0], N[100], ctype['midpoint'] - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-02-03], N[2], ctype['midpoint'] + lat_lon[lat]: UniformCoordinates1d(lat): Bounds[0.0, 1.0], N[100] + lat_lon[lon]: UniformCoordinates1d(lon): Bounds[10.0, 20.0], N[100] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-02-03], N[2] ``` -TODO mixed ctypes, etc... - ## Coordinate API TODO @@ -213,6 +205,7 @@ TODO Coordinates contain some useful properties relating to its dimensions and underlying coordinate values. ``` +>>> from podpac import Coordinates >>> c = Coordinates([lat, lon, time], dims=['lat', 'lon', 'time']) >>> c.ndims >>> c.dims diff --git a/doc/source/datasets.md b/doc/source/datasets.md index 69ec15a18..467ec368f 100644 --- a/doc/source/datasets.md +++ b/doc/source/datasets.md @@ -6,18 +6,18 @@ continue to expand support each release. The following datasets are currently su ## SMAP - **Source**: [NASA Soil Moisture Active Passive (SMAP) Satellites](https://smap.jpl.nasa.gov/data/) -- **Module**: `podpac.datalib.smap` +- **Module**: `podpac.datalib.smap`, `podpac.datalib.smap_egi` Global soil moisture measurements from NASA. ### Examples -- [Analyzing SMAP Data](https://github.com/creare-com/podpac-examples/blob/master/notebooks/basic_examples/analyzing-SMAP-data.ipynb) -- [Running SMAP Analysis on AWS Lambda](https://github.com/creare-com/podpac-examples/blob/master/notebooks/basic_examples/running-on-aws-lambda.ipynb) -- [SMAP Sentinel data access](https://github.com/creare-com/podpac-examples/blob/master/notebooks/demos/SMAP-Sentinel-data-access.ipynb) -- [SMAP downscaling example application](https://github.com/creare-com/podpac-examples/blob/master/notebooks/demos/SMAP-downscaling-example-application.ipynb) -- [SMAP level 4 data access](https://github.com/creare-com/podpac-examples/blob/master/notebooks/demos/SMAP-level4-data-access.ipynb) -- [SMAP data access widget](https://github.com/creare-com/podpac-examples/blob/master/notebooks/demos/SMAP-widget-data-access.ipynb) +- [Retrieving SMAP Data](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/smap/010-retrieving-SMAP-data.ipynb) +- [Analyzing SMAP Data](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/smap/100-analyzing-SMAP-data.ipynb) +- [Working with SMAP-Sentinel Data](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/smap/101-working-with-SMAP-Sentinel-data.ipynb) +- [SMAP-EGI](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/smap/SMAP-EGI.ipynb) +- [SMAP Data Access Without PODPAC](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/smap/SMAP-data-access-without-podpac.ipynb) +- [SMAP Downscaling Example Application](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/smap/SMAP-downscaling-example-application.ipynb) ## TerrainTiles @@ -28,8 +28,8 @@ Global dataset providing bare-earth terrain heights, tiled for easy usage and pr ### Examples -- [Terrain Tiles Usage](https://github.com/creare-com/podpac-examples/blob/master/notebooks/demos/Terrain-Tiles.ipynb) -- [Terrain Tiles Pattern Match](https://github.com/creare-com/podpac-examples/blob/master/notebooks/demos/Terrain-Tiles-Pattern-Match.ipynb) +- [Terrain Tiles Usage](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/terrtain-tiles.ipynb) +- [Terrain Tiles Pattern Match](https://github.com/creare-com/podpac-examples/blob/master/notebooks/scratch/demos/Terrain-Tiles-Pattern-Match.ipynb) ## GFS @@ -40,4 +40,4 @@ Weather forecast model produced by the National Centers for Environmental Predic ### Examples -- [GFS Usage](https://github.com/creare-com/podpac-examples/blob/master/notebooks/demos/gfs.ipynb) \ No newline at end of file +- [GFS Usage](https://github.com/creare-com/podpac-examples/blob/master/notebooks/5-datalib/gfs.ipynb) \ No newline at end of file diff --git a/doc/source/dependencies.md b/doc/source/dependencies.md index ac299748a..f84605b46 100644 --- a/doc/source/dependencies.md +++ b/doc/source/dependencies.md @@ -4,8 +4,7 @@ This document provides an overview of the dependencies leveraged by PODPAC. ## Requirements -- Python (3.6 or later) - - We suggest you use the the [Anaconda Python Distribution](https://www.anaconda.com/) +- [Python](https://www.python.org/) (3.6 or later) — [Anaconda Python Distribution](https://www.anaconda.com/distribution/#download-section) recommended ## OS Specific Requirements @@ -13,11 +12,11 @@ If using `pip` to install, the following OS specific dependencies are required t ### Windows -> No external dependencies necessary +> No external dependencies necessary, though using Anaconda is recommended. ### Mac -> No external dependencies necessary +> No external dependencies necessary, though using Anaconda is recommended. ### Linux @@ -33,31 +32,46 @@ $ sudo apt-get install build-essential python-dev ## Core Dependencies -> See [requirements.txt](https://github.com/creare-com/podpac/blob/develop/requirements.txt) and [setup.py](https://github.com/creare-com/podpac/blob/develop/setup.py) for the latest dependencies listing. +> See [setup.py](https://github.com/creare-com/podpac/blob/master/setup.py) for the latest dependencies listing. + + "matplotlib>=2.1", + "numpy>=1.14", + "pint>=0.8", + "scipy>=1.0", + "traitlets>=4.3", + "xarray>=0.10", + "requests>=2.18", + "pyproj>=2.4", + "lazy-import>=0.2.2", + "psutil", -- Python 2.7\[[1](#f1)\], 3.5, 3.6, or 3.7 - - We suggest you use the the [Anaconda Python Distribution](https://www.anaconda.com/) - [numpy](http://www.numpy.org/), [scipy](https://www.scipy.org/), [xarray](http://xarray.pydata.org/en/stable/): array handling - [traitlets](https://github.com/ipython/traitlets): input and type handling - [pint](https://pint.readthedocs.io/en/latest/): unit handling - [requests](http://docs.python-requests.org/en/master/): HTTP requests +- [matplotlib](https://matplotlib.org/): plotting +- [pyproj](http://pyproj4.github.io/pyproj/stable/): coordinate reference system handling +- [psutil](https://psutil.readthedocs.io/en/latest/): cache management + ## Optional Dependencies -> Optional dependencies can be [installed using `pip`](insatllation.html#installing-via-pip) +> Optional dependencies can be [installed using `pip`](install.html#install-with-pip) -- Data Handling +- `datatype`: Data Handling - [h5py](https://www.h5py.org/): interface to the HDF5 data format - [pydap](http://www.pydap.org/en/latest/): python support for Data Access Protocol (OPeNDAP) - [rasterio](https://github.com/mapbox/rasterio): read GeoTiff and other raster datasets - [lxml](https://github.com/lxml/lxml): read xml and html files - [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/): text parser and screen scraper -- AWS + - [zarr](https://zarr.readthedocs.io/en/stable/): cloud optimized storage format +- `aws`: AWS integration - [awscli](https://github.com/aws/aws-cli): unified command line interface to Amazon Web Services - [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html): Amazon Web Services (AWS) SDK for Python -- Algorithms + - [s3fs](https://pypi.org/project/s3fs/): Convenient Filesystem interface over S3. +- `algorithm`: Algorithm development - [numexpr](https://github.com/pydata/numexpr): fast numerical expression evaluator for NumPy -- Notebook +- `notebook`: Jupyter Notebooks - [jupyterlab](https://github.com/jupyterlab/jupyterlab): extensible environment for interactive and reproducible computing - [ipyleaflet](https://github.com/jupyter-widgets/ipyleaflet): [LeafletJS](https://leafletjs.com/) interface for jupyter notebooks - [ipywidgets](https://ipywidgets.readthedocs.io/en/stable/): interactive widgets for Jupyter notebooks diff --git a/doc/source/deploy-notes.md b/doc/source/deploy-notes.md index 79caf955a..9a316574b 100644 --- a/doc/source/deploy-notes.md +++ b/doc/source/deploy-notes.md @@ -58,12 +58,12 @@ $ bin\activate_podpac_conda_env.bat # Install core dependencies $ conda install matplotlib>=2.1 numpy>=1.14 scipy>=1.0 traitlets>=4.3 xarray>=0.10 ipython psutil requests>=2.18 -$ conda install pyproj>=2.2 rasterio>=1.0 +$ conda install pyproj>=2.2 rasterio>=1.0 -c conda-forge $ pip install pint>=0.8 lazy-import>=0.2.2 # Install dependencies for handling various file datatype $ # conda install rasterio>=1.0 # Installed above alongside pyproj -$ conda install beautifulsoup4>=4.6 h5py>=2.9 lxml>=4.2 zarr>=2.3 +$ conda install beautifulsoup4>=4.6 h5py>=2.9 lxml>=4.2 zarr>=2.3 intake>=0.5 $ pip install pydap>=3.2 # Install dependencies for AWS diff --git a/doc/source/design.rst b/doc/source/design.rst index ac585ec25..c69f30117 100644 --- a/doc/source/design.rst +++ b/doc/source/design.rst @@ -61,7 +61,8 @@ Pipelines can also be complex, like two data sources being combined into an algo .. image:: /_static/img/complex-pipeline.png :width: 85% - +Pipelines are note explicitly implemented, but this functionality is available through `Nodes`.To see the representation of +a pipeline use `Node.definition`. To create a pipeline from a definition use `Node.from_definition(definition)`. Repository Organization ----------------------- @@ -70,9 +71,6 @@ The directory structure is as follows: - ``dist``: Contains installation instructions and environments for various deployments, including cloud deployment on AWS - ``doc``: Sphinx based documentation - - ``source``: sphinx docs source - - ``notebooks``: example jupyter notebooks -- ``html``: HTML pages used for demonstrations - ``podpac``: The PODPAC Python library - ``core``: The core PODPAC functionality -- contains general implementation so of classes - ``datalib``: Library of Nodes used to access specific data sources -- this is where the SMAP node is implemented (for example) diff --git a/doc/source/docs.md b/doc/source/docs.md index 38e3a7b5c..50db1f2de 100644 --- a/doc/source/docs.md +++ b/doc/source/docs.md @@ -7,8 +7,8 @@ The following sections outlines how to develop and build the `podpac` documentat - Install **Sphinx** and the **Read the Docs** theme ```bash -$ conda install sphinx # or `pip install sphinx` -$ conda install sphinx_rtd_theme # or `pip install sphinx-rtd-theme` +$ pip install sphinx +$ pip install sphinx_rtd_theme ``` - Install `recommonmark` to support markdown input files @@ -21,9 +21,11 @@ $ pip install recommonmark To run the doctests in the documentation, run from the `/doc` directory: -``` -$ ./test-docs.sh # convience script -or +```bash +$ ./test-docs.sh # convienence script + +# or + $ sphinx-build -b doctest source build # run tests manually ``` @@ -34,8 +36,10 @@ $ sphinx-build -b doctest source build # run tests manually To build the documentation into a website in the `doc/build` directory, run from the `/doc` directory: ```bash -$ make html -or +$ ./release-docs.sh # convienence script + +# or + $ sphinx-build source build # run manually $ sphinx-build -aE source build # rebuild all files (no cache) ``` @@ -48,14 +52,16 @@ To build a pdf from the documentation, you need to install a latex distribution ```bash $ make latex # build the docs into a tex format in a latex directory -or -$ sphinx-build -b latex source latex # run sphinx manually to build lated b + +# or + +$ sphinx-build -b latex source latex # run sphinx manually to build latex ``` Enter the build directory and convert tex file to pdf: ```bash -$ cd build # go into the latex directory +$ cd build # go into the latex directory $ pdflatex podpac.tex # build the pdf from podpac.tex entry file ``` @@ -63,15 +69,17 @@ $ pdflatex podpac.tex # build the pdf from podpac.tex entry file To live-serve the documentation as a website during development, you will need to add one more python package [`sphinx-autobuild`](https://github.com/GaretJax/sphinx-autobuild): -``` +```bash $ pip install sphinx-autobuild ``` Then run from the `doc` directory: -``` -$ ./serve-docs.sh # convience script -or +```bash +$ ./serve-docs.sh # convienence script + +# or + $ sphinx-autobuild source build # run manually $ sphinx-autobuild -aE source build # rebuild all files (no cache) ``` @@ -86,10 +94,8 @@ To stop the server simply press `^C`. + `/source/_templates` - templates to use for styling pages + `/source/_static` - static files that need to be copied over to distributed documentation (i.e. images, source code, etc) + `/source/conf.py` - sphinx configuration file - + `/source/index.rst` - root documentation file. Includes TOC - + `/source/user/api/` - auto generated API documentation using `sphinx-autogen` - + `/source/user/api-min/` - auto generated minimal API documentation using `sphinx-autogen` - + ... add others as generated ... + + `/source/index.rst` - root documentation file + + `/source/api/` - auto generated API documentation using `sphinx-autogen` - `/build` - generated documentation files ## References diff --git a/doc/source/earthdata.md b/doc/source/earthdata.md index 197095d71..e338864a5 100644 --- a/doc/source/earthdata.md +++ b/doc/source/earthdata.md @@ -3,6 +3,7 @@ This document describes using an Earth Data Account with PODPAC. ## Motivation + * An Earth Data Login account is needed to access the wealth of data provided by NASA. * PODPAC automatically retrieves this data using OpenDAP. @@ -11,10 +12,11 @@ NASA. * save their credentials as part of a PODPAC settings files * provide their username and password to the child class of a PyDAP node at runtime -## Creating an EarthData Login Account -* Go to the [EarthData Registration Page](https://urs.earthdata.nasa.gov/users/new) +## Creating an Earthdata Login Account + +* Go to the [Earthdata Registration Page](https://urs.earthdata.nasa.gov/users/new) page and follow the instructions to register an account -* Go to the [EarthData Login Page](https://urs.earthdata.nasa.gov/) to log into +* Go to the [Earthdata Login Page](https://urs.earthdata.nasa.gov/) to log into your account * To enable OpenDAP access: * Go to your [Profile](https://urs.earthdata.nasa.gov/profile) once logged in. @@ -23,42 +25,37 @@ NASA. * Find the `NASA GESDICS DATA ARCHIVE`, `NSIDC V0 OPeNDAP`, and `NSIDC_DATAPOOL_OPS` applications * Additional applications may be required to access datasets of interest * For each, click the `APPROVE` button -* At this stage, your EarthData account should be set up and ready to use for +* At this stage, your Earthdata account should be set up and ready to use for accessing data through OpenDAP -## Saving EarthData Credentials in PODPAC Settings -For convenience, PODPAC can store your EarthData login details so that you do -not have to provide your username and password at run time or in every script. +## Using Earthdata Credentials in PODPAC + + +PODPAC uses Earthdata credentials to access the SMAP data source nodes. +You can store the credentials for SMAP nodes using the `Node` method `set_credentials`. -> **NOTE:** PODPAC stores your credentials in a plain text file. If this is -a security issue for you, do not use this method. +To store credentials for SMAP nodes, use the following code in an interactive Python session: -To store your credentials use the following code in an interactive Python session: ```python -from podpac.core.authentication import EarthDataSession -eds = EarthDataSession() -eds.update_login() +from podpac.datalib import SMAP + +node = SMAP() +node.set_credentials(username="", password="") ``` -Then follow the on-screen prompts to enter our username and password. -Your credentials will be saved in `$HOME\.podpac\settings.json` -where `$HOME` is usually `C:\users\` on Windows and `/home/` -on Linux systems. +The `set_credentials` method stores credentials for a Node in the PODPAC settings. +To persistently save the credentials in the PODPAC settings +(to avoid running `set_credentials` at runtime or in a script), run `settings.save()`: -## Setting credentials at Runtime -To set credentials at runtime, you can either provide an authenticated session -or the username and password to the PyDAP node or child node. For example +> **NOTE:** PODPAC stores credentials in plain text. +> Be conscious of outputting the PODPAC settings to a file when it contains credentials. -```python -from podpac.authentication import EarthDataSession -eds = EarthDataSession(username=, password=) -from podpac.data import PyDAP -pydap_node = PyDAP(source=, auth_session=eds) ``` +from podpac import settings -Or - -```python -from podpac.data import PyDAP -pydap_node = PyDAP(source=, username=, password=) +settings.save() ``` + +Your credentials will be saved in `$HOME\.podpac\settings.json` +where `$HOME` is usually `C:\users\` on Windows and `/home/` +on Linux systems. diff --git a/doc/source/examples.rst b/doc/source/examples.rst index 489bcc532..f8d4b3147 100644 --- a/doc/source/examples.rst +++ b/doc/source/examples.rst @@ -72,8 +72,8 @@ Note the order of the ``dims`` keyword must match the shape of our data. In [8]: coords Out[8]: coords Coordinates (EPSG:4326) - lat: ArrayCoordinates1d(lat): Bounds[40.0, 50.0], N[11], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[-10.0, 10.0], N[21], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[40.0, 50.0], N[11] + lon: ArrayCoordinates1d(lon): Bounds[-10.0, 10.0], N[21] Create a PODPAC ``Array`` Node from ``data`` and ``coords``. An ``Array`` Node is a sub-class of ``DataSource`` Node. @@ -81,7 +81,7 @@ An ``Array`` Node is a sub-class of ``DataSource`` Node. .. code:: python # create node for data source - In [9]: node = podpac.data.Array(source=data, native_coordinates=coords) + In [9]: node = podpac.data.Array(source=data, coordinates=coords) In [10]: node Out[10]: Array DataSource @@ -91,9 +91,9 @@ An ``Array`` Node is a sub-class of ``DataSource`` Node. 0.11195743 0.58360194 0.15225759 0.99246553 0.31122967 0.80974094 0.00474486 0.00650152 0.08999056] ...]] - native_coordinates: - lat: ArrayCoordinates1d(lat): Bounds[40.0, 50.0], N[11], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[-10.0, 10.0], N[21], ctype['midpoint'] + coordinates: + lat: ArrayCoordinates1d(lat): Bounds[40.0, 50.0], N[11] + lon: ArrayCoordinates1d(lon): Bounds[-10.0, 10.0], N[21] interpolation: nearest We've successfully created a ``DataSource`` Node that describes an 11 x 21 grid of data values with lat and lon ``Coordinates``. @@ -140,7 +140,7 @@ configured. output = node.eval(coords) -`Notebooks `__ +Notebooks --------- Interactive PODPAC examples are distributed as `example Jupyter diff --git a/doc/source/install.md b/doc/source/install.md index 3d569c1a6..baa5e99e7 100644 --- a/doc/source/install.md +++ b/doc/source/install.md @@ -5,9 +5,9 @@ PODPAC is available for Windows, Mac, and Linux. Select the installation method the best suits your development environment: - [pip](#install-with-pip): Recommended for most users -- [Docker](#install-with-docker): For use in containers +- [Docker](#docker): For use in containers - [Install from source](#install-from-source): For development -- [Standalone distribution](#standalone-distribution): Includes Python and all dependencies +- [Standalone distribution](#standalone-distibution): Includes Python and all dependencies ## Install with pip @@ -15,15 +15,14 @@ Select the installation method the best suits your development environment: Confirm you have the required dependencies installed on your computer: -- [Python](https://www.python.org/) (3.6 or later) - - We recommend the [Anaconda Python Distribution](https://www.anaconda.com/distribution/#download-section) +- [Python](https://www.python.org/) (3.6 or later) — [Anaconda Python Distribution](https://www.anaconda.com/distribution/#download-section) recommended - See [operating system requirements](dependencies.html#os-specific-requirements) ### Environment If using Anaconda Python, create a PODPAC dedicated Anconda environment: -```bash +``` # create environment with all `anaconda` packages $ conda create -n podpac python=3 anaconda @@ -33,7 +32,7 @@ $ conda activate podpac If using a non-Anaconda Python distribution, create a PODPAC dedicated virtual environment: -```bash +``` # create environment in $ python3 -m venv @@ -45,7 +44,7 @@ $ source /bin/activate After activating the virtual environment, install using `pip` with one of the following commands: -```bash +``` $ pip install podpac # base installation $ pip install podpac[datatype] # install podpac and optional data handling dependencies $ pip install podpac[notebook] # install podpac and optional notebook dependencies @@ -70,20 +69,20 @@ Once you have Docker installed, the following steps will allow you to run PODPAC - Download the [PODPAC Dockerfile](https://github.com/creare-com/podpac/blob/master/Dockerfile) from the repository - From the directory where you downloaded the `Dockerfile`, run: -```bash +``` # build the docker image with the tag `podpac` $ docker build -t podpac . ``` - Run the built image -```bash +``` # run the docker image in an interactive shell $ docker run -i -t podpac ``` -## Standalone Distibution +## Standalone Windows Distibution ### Windows 10 @@ -94,7 +93,6 @@ The Window 10 standalone distribution requires no pre-installed operating system - For older versions, substitute `latest` in the url with the version number, i.e. `PODPAC_1.2.0_install_windows10.zip` - Once downloaded, extract the zip file into a folder on your machine. - We recommend expanding it near the root of your drive (e.g. `C:\PODPAC`) due to long file paths that are part of the installation. - - We also recommend *unblocking* the file to speed up the unzipping process. See this [Microsoft Developer Entry](https://blogs.msdn.microsoft.com/delay/p/unblockingdownloadedfile/). Once the folder is unzipped: @@ -106,6 +104,7 @@ Once the folder is unzipped: - This will open up a Windows command prompt, and launch a JupyterLab notebook in your default web browser - To close the notebook, close the browser tab, and close the Windows console +To make this standalone distribution, see the [deploy notes](deploy-notes.md). ## Install from Source @@ -122,7 +121,7 @@ Confirm you have the required dependencies installed on your computer: If using Anaconda Python, create a PODPAC dedicated Anconda environment: -```bash +``` # create environment with all `anaconda` packages $ conda create -n podpac python=3 anaconda @@ -132,7 +131,7 @@ $ conda activate podpac If using a non-Anaconda Python distribution, create a PODPAC dedicated virtual environment: -```bash +``` # create environment in $ python3 -m venv @@ -144,7 +143,7 @@ $ source /bin/activate After activating the virtual environment, clone the [podpac repository](https://github.com/creare-com/podpac) onto your machine: -```bash +``` $ cd $ git clone https://github.com/creare-com/podpac.git $ cd podpac @@ -155,7 +154,7 @@ By default, PODPAC clones to the `master` branch, which is the latest stable rel To use a previous release, checkout the `tags/` reference. For bleeding edge, checkout the `develop` branch. -```bash +``` $ git fetch origin # fetch all remote branches $ git checkout -b release/ tags/ # checkout specific release $ git checkout -b develop origin/develop # latest stable version @@ -163,7 +162,7 @@ $ git checkout -b develop origin/develop # latest stable version From the root of the git repository, install using `pip` with one of the following commands: -```bash +``` $ pip install . # base installation $ pip install .[datatype] # install podpac and optional data handling dependencies $ pip install .[notebook] # install podpac and optional notebook dependencies @@ -176,7 +175,7 @@ See [Optional Dependencies](dependencies.html#optional-dependencies) more inform To install PODPAC and keep installation up to date with local changes, use the option `-e` when installing: -```bash +``` $ pip install -e . # install podpac with only core dependencies $ pip install -e .[devall] # install podpac and all optional dependencies ``` @@ -191,7 +190,7 @@ If you encounter issues, we recommend trying to install [rasterio](https://raste ### UnicodeDecodeError -```bash +``` UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 13: ordinal not in range(128)* ``` @@ -200,7 +199,7 @@ See this [stack overflow answer](https://stackoverflow.com/a/49127686) for a sol ### Python.h -```bash +``` psutil/_psutil_common.c:9:10: fatal error: Python.h: No such file or directory #include ^~~~~~~~~~ diff --git a/doc/source/nodes.md b/doc/source/nodes.md index 28101de09..8d4ed5bc4 100644 --- a/doc/source/nodes.md +++ b/doc/source/nodes.md @@ -2,18 +2,29 @@ This document describes the detailed interfaces for core node types so that a user may know what to expect. It also documents some of the available nodes implemented as part of the core library. -... tbd ... (for now see the [DeveloperSpec](https://github.com/creare-com/podpac/blob/develop/doc/source/developer/specs/nodes.md)) +In PODPAC, Nodes represent the basic unit of computation. They take inputs, produce outputs, and can represent source data, intermediate results, or final output. The base `Node` class defined a common interface for all PODPAC `Nodes`. + +In particular, the base `Node` class implements: + +- Caching behaviour of `Node` outputs, and interfaces with the cache system +- Serialization and deserialization of `Nodes` using our JSON format +- Saving and loading `Node` outputs +- Creating `Node` output data structures using the `create_output_array` method. +- Common interfaces required and used by all subsequent nodes: + * `Node.eval(coordinates, output)` + * `Node.find_coordinates()` ## DataSource -DataSource nodes interface with remote geospatial data sources (i.e. raster images, DAP servers, numpy arrays) and define how to retrieve data from these remote sources using PODPAC coordinates. PODPAC defines common generic DataSource nodes (i.e. Array, PyDAP), but advanced users can define their own DataSource nodes by defining the methods to retrieve data (`get_data(coordinates, index)`) and the method to define the `native_coordinates` property (`get_native_coordinates()`). +DataSource nodes interface with remote geospatial data sources (i.e. raster images, DAP servers, numpy arrays) and define how to retrieve data from these remote sources using PODPAC coordinates. PODPAC defines common generic DataSource nodes (i.e. Array, PyDAP), but advanced users can define their own DataSource nodes by defining the methods to retrieve data (`get_data(coordinates, index)`) and the method to define the `coordinates` property (`get_native_coordinates()`). Key properties of DataSource nodes include: - `source`: The location of the source data. Depending on the child node this can be a filepath, numpy array, or server URL). -- `native_coordinates`: The PODPAC coordinates of the data in `source` +- `coordinates`: The PODPAC coordinates of the data in `source` - `interpolation`: Definition of the interpolation method to use with the data source. - `nan_vals`: List of values from source data that should be interpreted as 'no data' or 'nans'. +- `boundary`: A structure defining the boundary of each data point in the data source (for example to define a point, area, or arbitrary polygon) To evaluate data at arbitrary PODPAC coordinates, users can input `coordinates` to the eval method of the DataSource node. The DataSource `eval` process consists of the following steps: @@ -31,13 +42,13 @@ The DataSource `interpolation` property defines how to handle interpolation of c Definition of the interpolation method on a DataSource node may either be a string: ```python -node.interpolation = 'nearest' # nearest neighbor interpolation +interpolation = 'nearest' # nearest neighbor interpolation ``` or a dictionary that supplies extra parameters: ```python -node.interpolation = { +interpolation = { 'method': 'nearest', 'params': { 'spatial_tolerance': 1.1 @@ -45,24 +56,61 @@ node.interpolation = { } ``` -For the most advanced users, the interpolation definition supports defining different interpolation methods for different dimensions: +For the most advanced users, the interpolation definition supports defining different interpolation methods for different dimensions (as of 2.0.0 this functionality is not fully implemented): ```python -node.interpolation = { - ('lat', 'lon'): 'bilinear', - 'time': 'nearest' -} +interpolation = [ + { + 'method': 'bilinear', + 'dims': ['lat', 'lon'] + }, + { + 'method': 'nearest', + 'dims': ['time'] + } +] ``` When a DataSource node is created, the interpolation manager selects a list of applicable `Interpolator` classes to apply to each set of defined dimensions. When a DataSource node is being evaluated, the interpolation manager chooses the first interpolator that is capable of handling the dimensions defined in the requested coordinates and the native coordinates using the `can_interpolate` method. After selecting an interpolator for all sets of dimensions, the manager sequentially interpolates data for each set of dimensions using the `interpolate` method. ## Compositor -... tbd ... +`Compositor` `Nodes` are used to combine multiple data files or dataset into a single interface. + +The `BaseCompositor` implements: + +- The `find_coordinates` method +- The `eval` method +- The `iteroutputs` method used to iterate over all possible input data sources +- The `select_sources(coordinates`) method to sub-select input data sources BEFORE evaluating them, as an optimization +- The interface for the `composite(coordinates, data_arrays, result)` method. Child classes implement this method which determines the logic for combining data sources. + +Beyond that there is the: + +- `OrderedCompositor` + - This is meant to composite disparate data sources together that might have different resolutions and coverage + - For example, prefer a high resolution elevation model which has missing data, but fill missing values with a coarser elevation datasource + - In practice, we use this `Compositor` to provide a single interface for a dataset that is divided into multiple files + - Data sources are composited AFTER harmonization. +- `TileCompositor` + - This is meant to composite a data source stored in multiple files into a single interface + - For example, consider an elevation data source that covers the globe and is stored in 10K different files that only cover land areas + - Data source are composited BEFORE harmonization ## Algorithm -... tbd ... +`Algorithm` `Nodes` are the backbone of the pipeline architecture and are used to perform computations on one or many data sources or the user-requested coordinates. + +The `BaseAlgorithm`, `Algorithm` (for multiple input nodes) and `UnaryAlgorithm` (for single input nodes) `Nodes` implement the basic functionality: + +- The `find_coordinates` method +- The `Algorithm.eval` method for multiple input `Nodes` +- The `inputs` property that finds any PODPAC `Node` as part of the class definition +- The interfaces for the `algorith(inputs)` method which is used to implement the actual algorithm + +Based on this basic interface, PODPAC implements algorithms that manipulate coordinates, does signal processing (e.g. convolutions), statistics (e.g. Mean), and completely generic, user-defined algorithms. + +In particular, the `Arithmetic` allows users to specify and `eqn` which allows nearly arbitrary point-wise computations. Also the `Generic` algorithm allows users to specify arbitrary Python code, as long as the `output` variable is set. ## Extending Podpac with Custom Nodes @@ -70,7 +118,7 @@ In addition to the core data sources and algorithms, you may need to write your ### Example -An example of creating a simple array-based datasource can be found in the [array-data-source](https://github.com/creare-com/podpac/blob/master/doc/notebooks/array-data-source.ipynb) notebook. +An example of creating a simple array-based datasource can be found in the [array-data-source](https://github.com/creare-com/podpac-examples/blob/master/notebooks/4-advanced/create-data-source.ipynb) notebook. ### Tagging attributes @@ -122,9 +170,9 @@ Individual node definition specify the node class along with its inputs and attr Additional properties and examples for each of the core node types are provided below. -## DataSource +### DataSource -### Sample +#### Sample ``` { @@ -137,13 +185,13 @@ Additional properties and examples for each of the core node types are provided } ``` -## Compositor +### Compositor -### Additional Properties +#### Additional Properties * `sources`: nodes to composite *(list, required)* -### Sample +#### Sample ``` { @@ -158,12 +206,12 @@ Additional properties and examples for each of the core node types are provided } ``` -## Algorithm +### Algorithm -### Additional Properties +#### Additional Properties * `inputs`: node inputs to the algorithm. *(object, required)* -### Sample +#### Sample ``` { @@ -189,7 +237,7 @@ Additional properties and examples for each of the core node types are provided } ``` -## Notes +### Notes * The `node` path should include the submodule path and the node class. The submodule path is omitted for top-level classes. For example: - `"node": "datalib.smap.SMAP"` is equivalent to `from podpac.datalib.smap import SMAP`. diff --git a/doc/source/overview.md b/doc/source/overview.md index fa49b8927..4022f125d 100644 --- a/doc/source/overview.md +++ b/doc/source/overview.md @@ -87,8 +87,8 @@ import podpac nodeA = podpac.data.Rasterio(source="elevation.tif", interpolation="cubic") nodeB = podpac.datalib.TerrainTiles(tile_format='geotiff', zoom=8) -# take the mean of the two data sources -alg_node = podpac.algorithm.Arithmetic(A=nodeA, B=nodeB, eqn='(A * B) / 2') +# average the two data sources together point-wise +alg_node = podpac.algorithm.Arithmetic(A=nodeA, B=nodeB, eqn='(A + B) / 2') ``` Evaluate pipelines at arbitrary PODPAC coordinates. diff --git a/doc/source/pipelines.md b/doc/source/pipelines.md deleted file mode 100644 index d4c8dd92f..000000000 --- a/doc/source/pipelines.md +++ /dev/null @@ -1,275 +0,0 @@ -# Pipelines - -*DEPRECATED: This functionality has been integrated into the Node class. Pipelines will be removed in popdac 2* - -## Introduction -A podpac pipeline can be defined using JSON. The pipeline definition describes the *nodes* used in the pipeline and the *output* for the pipeline. - -### Attributes - - * `nodes`: node definitions *(object, required)* - * `output`: output definition *(object, optional)* - -### Sample - -``` -{ - "nodes": { - "myNode": { ... }, - "myOtherNode": { ... } - ... - "myResult": { ... } - }, - "output": { - "node": "myResult", - "mode": "file", - ... - } -} -``` - -## Node definitions - -A node definition defines the node and its inputs, attributes, and default execution parameters. It also names the node so that it can be used as an input to other nodes in the pipeline. Nodes must be defined before they are referenced in a later node. - -The podpac core library includes three basic types of nodes: *DataSource*, *Compositor*, and *Algorithm*. A *Pipeline* node can also be used an an input to a pipeline. These nodes and their additional attributes are described below. - -### Common Attributes - - * `node`: a path to the node class. The path is relative to the podpac module, unless `plugin` is defined. See Notes. *(string, required)* - * `plugin`: a path to a plugin module to use (prepended node path). See Notes. *(string, optional)* - * `attrs`: set attributes in the node for custom behavior. Each value can be a number, string, boolean, dictionary, or list. *(object, optional)* - -## DataSource - -### Sample - -``` -{ - "nodes": { - "sm": { - "node": "algorithm.CoordData", - "attrs": { - "coord_name": "time" - } - } - } -} -``` - -## Compositor - -### Additional Attributes - - * `sources`: nodes to composite *(list, required)* - -### Sample - -``` -{ - "nodes": { - "SourceA": { ... }, - "SourceB": { ... }, - "SourceC": { ... }, - - MyCompositor": { - "node": "OrderedCompositor", - "sources": ["SourceA", "SourceB", "SourceC"] - } - } -} -``` - -## Algorithm - -### Additional Attributes - * `inputs`: node inputs to the algorithm. *(object, required)* - -### Sample - -``` -{ - "nodes": { - "MyNode": { ... }, - "MyOtherNode": { ... }, - "MyThirdNode": { ... }, - - "downscaled_sm": { - "node": "Arithmetic", - "inputs": { - "A": "MyNode", - "B": "MyOtherNode", - "C": "MyThirdNode" - }, - "attrs": { - "eqn": "A + {tsmtr} / {kappa} * (B - C)", - "params": { - "kappa": "13", - "tsmtr": "0.3" - } - } - } - } -} -``` - -## Pipeline - -### Additional Attributes - * `path`: path to another pipeline JSON definition. *(string, required)* - -### Sample - -``` -{ - "nodes": { - "MyDataSource": { - ... - }, - - "MyOtherPipeline": { - "path": "path to pipeline" - }, - - "result": { - "node": "Arithmetic", - "inputs": { - "A": "MyDataSource", - "B": "MyOtherPipeline", - }, - "attrs": { - "eqn": "A + B" - } - } - } -} -``` - -### Notes - - * The `node` path should include the submodule path and the node class. The submodule path is omitted for top-level classes. For example: - - `"node": "datalib.smap.SMAP"` is equivalent to `from podpac.datalib.smap import SMAP`. - - `"node": "compositor.OrderedCompositor"` is equivalent to `from podpac.compositor import OrderedCompositor`. - * The `plugin` path replaces 'podpac' in the full node path. For example - - `"plugin": "path.to.myplugin", "node": "mymodule.MyCustomNode"` is equivalent to `from path.to.myplugin.mymodule import MyCustomNode`. - - `"plugin": "myplugin", "node": "MyCustomNode"` is equivalent to `from myplugin import MyCustomNode` - -## Output Definition - -The output definition defines the node to output and, optionally, an additional output mode along with associated parameters. If an output definition is not supplied, the last defined node is used. - -Podpac provides several builtin output types, *file* and *image*. You can also define custom outputs in a plugins. - -### Common Attributes - - * `node`: The nodes to output. *(list, required)* - * `mode`: For builtin outputs, options are 'none' (default), 'file', 'image'. *(string, optional)* - -## None (default) - -No additional output. The output will be returned from the `Pipeline.execute` method. - -## Files - -Nodes can be output to file in a variety of formats. - -### Additional Attributes - - * `format`: file format, options are 'pickle' (default), 'geotif', 'png'. *(string, optional)* - * `outdir`: destination path for the output file *(string, required)* - -### Sample - -``` -{ - "nodes": { - "MyNode1": { ... }, - "MyNode2": { ... } - }, - - "output": { - "nodes": "MyNode2", - "mode": "file", - "format": "png", - "outdir": "C:\Path\To\OutputData" - } -} -``` - -## Images - -Nodes can be output to a png image (in memory). - -### Additional Attributes - - * `format`: image format, options are 'png' (default). *(string, optional)* - * `vmin`: min value for the colormap *(number, optional)* - * `vmax`: max value for the colormap *(number, optional)* - -### Sample - -``` -{ - "nodes": { - "MyNode1": { ... }, - "MyNode2": { ... } - }, - - "output": { - "nodes": "MyNode2", - "mode": "image", - "format": "png", - "vmin": 0.1, - "vmax": 0.35 - } -} -``` - -## Custom Outputs - -Custom outputs can be defined in a plugin by subclassing the `Output` base class found in `core.pipeline.output`. Custom -outputs must define the `write` method with no arguments, and may define additional parameters. - -### Attributes - -Replace the 'mode' parameter with a plugin path and output class name: - - * `plugin`: path to a plugin module to use *(string, required)* - * `output`: output class name *(string, required)* - -### Sample Custom Output Class - -File: **my_plugin/outputs.py** - -``` -import numpy as np -import traitlets as tl -import podpac - -class NpyOutput(podpac.core.pipeline.output.Output): - path = tl.String() - allow_pickle = tl.Bool(True) - fix_imports = tl.Bool(True) - - def write(self): - numpy.save(self.path, self.node.output.data, allow_pickle=self.allow_pickle, fix_imports=self.fix_imports) -``` - -### Sample Pipeline - -``` -{ - "nodes": { - "MyNode1": { ... }, - "MyNode2": { ... } - }, - - "output": { - "nodes": "MyNode2", - "plugin": "my_plugin", - "output": "NpyOutput", - "path": "my_pipeline_output.npy", - "allow_pickle": false - } -} -``` diff --git a/doc/source/settings.md b/doc/source/settings.md index c6e56a67a..8d59d31a4 100644 --- a/doc/source/settings.md +++ b/doc/source/settings.md @@ -4,7 +4,7 @@ This tutorial describes methods for viewing and editing PODPAC settings used to To follow along, open a Python interpreter or Jupyter notebook in the Python environment where PODPAC is installed. -``` +```bash # activate the PODPAC environment, using anaconda $ conda activate podpac @@ -30,7 +30,7 @@ The settings are stored in a dictionary format, accessible in the interpreter: In [2]: settings Out[2]: {'DEBUG': False, - 'ROOT_PATH': 'C:\\Users\\user\\.podpac', + 'ROOT_PATH': 'C:\\Users\\user\\.config\\podpac', 'AUTOSAVE_SETTINGS': False, ... } @@ -42,7 +42,7 @@ To view the default settings, view `settings.defaults`: In [3]: settings.defaults Out[3]: {'DEBUG': False, - 'ROOT_PATH': 'C:\\Users\\user\\.podpac', + 'ROOT_PATH': 'C:\\Users\\user\\.config\\podpac', 'AUTOSAVE_SETTINGS': False, ... } @@ -145,7 +145,7 @@ To see the PODPAC root directory, view `settings["ROOT_PATH"]`: In [1]: from podpac import settings In [2]: settings["ROOT_PATH"] -Out[5]: 'C:\\Users\\user\\.podpac' +Out[5]: 'C:\\Users\\user\\.config\\podpac' ``` Edit the `settings.json` file in the `"ROOT_PATH"` location, then open a new interpreter and load the `podpac.settings` module to see the overwritten values: @@ -164,11 +164,11 @@ Out[2]: 1000000000.0 ``` If a `settings.json` files exist in multiple places, PODPAC will load settings in the following order, -overwriting previously loaded settings in the process: +overwriting previously loaded settings (lower numbered items) in the process: -* podpac default settings -* home directory settings (`~/.podpac/settings.json`) -* current working directory settings (`./settings.json`) +1. podpac default settings +2. home directory settings (`~/.config/podpac/settings.json`) +3. current working directory settings (`./settings.json`) The attribute `settings.settings_path` shows the path of the last loaded settings file (e.g. the active settings file). @@ -176,7 +176,7 @@ The attribute `settings.settings_path` shows the path of the last loaded setting In [1]: from podpac import settings In [2]: settings.settings_path -Out[2]: 'C:\\Users\\user\\.podpac' +Out[2]: 'C:\\Users\\user\\.config\\podpac' ``` A `settings.json` file can be loaded from outside the search path using the `settings.load()` method: diff --git a/doc/source/specs/caching.md b/doc/source/specs/caching.md deleted file mode 100644 index 32ee1b9a3..000000000 --- a/doc/source/specs/caching.md +++ /dev/null @@ -1,327 +0,0 @@ -# Requirements -* Support storing results of calculations - * This includes: - * ? numpy arrays - * xarrays - * UnitsDataArray - * ? DataSource - * ? other serialized binary data - * GeoTIFFs -* Cached queries are idempotent with respect to some set of parameters that define the query (location/datetime, datasource, paramters used to compute a prediction). -* ability to retrieve calculated data from a pipeline after execution in an interactive shell for debugging or analysis purposes -* ? Support retrieval of subsets of data. For example, previous results of a calculation over North America at some resolution are cached. Does cache support retrieval of just the results for California at the same resolution? Or, does interpolation handle this in conjunction with cache? Or, does interpolation only handle it when the resolution doesn't match. Or does caller request data using "key" of original cache and an additional "subset" parameter? -* Support different storage mediums - * This includes: - * RAM - * local disk (HDD/SSD) - * AWS s3 - * ? Databases (MongoDB supports geospatial queries and binary data) - * ? HDFS file system -* Support resource limitations - * This includes: - * Total number of bytes cached in RAM/disk/s3 across a process running podpac - * Total number of bytes cached in RAM/disk/s3 across multiple processes on the same computer/server running podpac - * Total number of bytes cached in RAM/disk/s3 across a cluster of servers running podpac -* Support prioritization of cached data under resource limitations - * Bassed on: - * size of data (bytes) - * computational cost to reproduce (time to compute) - * access patterns: - * frequency of use - * recency of use -* Support expiration of data (e.g. forecast data that is updated at some time interval) -* Support cache tiers: - * respond from "better" tier when possible (e.g. RAM) - * fall back on "worse" tier (e.g. local disk then s3 or some other networked storage) -* Support saving "providence" of data with the stored data - * For archived data (local disk, s3, database, HDFS) this could be the json pipeline definition and should include: - * version of podpac that was used to create it - * timestamp when it was created - * information about the root datasources (version if they have it) - * computational graph definition - * For in memory we may not want to be so robust but we may want include: - * Timestamp when it was computed/stored (to support expiration) - * Possibly information about the function call that created the data (for a cached property). This maybe could be a lambda function wrapping the original function with the original args/kwargs. But would have to be careful about args/kwargs that have state that may have changed. Could we maybe force these to be "static". - -# Example Use cases -1. SMAP data retrieved from NASA servers while serving a request is stored on local disk for future requests. -2. Server that uses podpac to handle requests for data that involves complex computations using forecast data (e.g. weather predictions) as an input. Multiple processes are used to load-balance handling of requests. The same calculation should ideally not be performed by more than one of these processes. However, this could be subject to available RAM. In addition, results of calculations bassed on forecast data should be redone when updated forecast data becomes available. -3. Server like above, but requests are handled by a "serverless" technology like AWS lambda. Intermediate results are cached in a network storage like AWS s3 (or sharded MongoDB cluster, or HDFS filesystem). -4. TODO: Add example usecases - -# Specification - -## `CacheStore` - -Abstract class representing a specific storage medium (e.g. folder on local-disk, s3-bucket, RAM). - -### User Interface -Starting from here for examples below: -```python -import numpy as np -import traitlets as tl -from podpac.core.node import Node -from podpac.core.coordinate import Coordinate -from podpac.core.units import UnitsDataArray, ureg -from podpac.core.cache import DiskStore -node = Node() -store = DiskStore() # assuming DiskStore is a concrete class inheriting from CacheStore -``` -`node` is used in the following methods to allow access to a storage "sandbox" specific to a `Node`. `store` will use methods and properties of `node` to determine which "sandbox" to use. This will mainly be a hash of `node.definition(type='json')`. A `CachStore` representing local-disk storage could also use the class inheritence of a `node` to create a user friendly directory structure, also a `node` could provide a user friendly string to prepend to any files representing cached data for the `node`. -### Methods - -public methods - -Put data into the cache for a node: - -```python -def put(self, node, data, key, coordinates=None): - '''Cache data for specified node. - - Parameters - ------------ - node : Node - node requesting storage. - data : any - Data to cache - key : str - Cached object key, e.g. 'output'. - coordinates : Coordinates, optional - Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output - ''' -``` - -Get data from the cache: - -```python -def get(self, node, key, coordinates=None): - '''Get cached data for this node. - - Parameters - ------------ - node : Node - node requesting storage. - key : str - Cached object key, e.g. 'output'. - coordinates : Coordinates, optional - Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output - - Returns - ------- - data : any - The cached data. - - Raises - ------- - CacheError - If the data is not in the cache. - ''' -``` - -Clear the cache: - -```python -def rem(self, node=None, key=None, coordinates=None): - '''Delete cached data for this node. - - Parameters - ------------ - node : Node - node requesting storage. - key : str, optional - Delete only cached objects with this key. - coordinates : Coordinates - Delete only cached objects for these coordinates. - ''' -``` - - * delete the entire cache: `store.rem()` - * delete the entire cache for a node: `store.rem(node)` - * delete a specific cached object (coordinate-depedent): `store.rem(node=node, key=mykey, coordinates=coords)` - * delete a specific cached object (coordinate-independent): `store.rem(node=node, key=mykey)` - * delete all cached data for a node for specific coordinates: `store.rem(node=node, coordinates=coords)` - * delete all cached objects for a node with a given key for any coordinates: `store.rem(node=node, key=mykey)` - -Just check the cache: - -```python -def has(self, node, key, coordinates=None): - '''Check for cached data for this node - - Parameters - ------------ - node : Node - node requesting storage. - key : str - Cached object key, e.g. 'output'. - coordinates: Coordinate, optional - Coordinates for which cached object should be checked - - Returns - ------- - has_cache : bool - True if there as a cached object for this node for the given key and coordinates. - ''' -``` - -### Developer interface - -#### Public Attributes - -* `cache_types` : categories the `CacheStore` files under, e.g. `ram`,`disk`,`network`. - -## `CacheCtrl` - -### User interface -Starting from here for examples below: -```python -import numpy as np -import traitlets as tl -from podpac.core.node import Node -from podpac.core.coordinate import Coordinate -from podpac.core.units import UnitsDataArray, ureg -from podpac.core.cache import CacheCtrl -node = Node() -cache = CacheCtrl() # assuming DiskStore is a concrete class inheriting from CacheStore -``` - -Instances hold a collection of `CacheStore` objects. Could be an ordered list representing the tiers to look in, e.g. first look in RAM, then local-disk, then s3. Alternatively, `CacheStore` could hold its own fall-back (e.g. a RAM `CacheStore` falls back on a local-disk `CacheStore`) - -### Methods - -public methods - -Put data into the cache for a node: - -```python -def put(self, node, data, key, coordinates=None, mode=None): - '''Cache data for specified node. - - Parameters - ------------ - node : Node - node requesting storage. - data : any - Data to cache - key : str - Cached object key, e.g. 'output'. - coordinates : Coordinates, optional - Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output - mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. - ''' -``` - -Get data from the cache: - -```python -def get(self, node, key, coordinates=None, mode=None): - '''Get cached data for this node. - - Parameters - ------------ - node : Node - node requesting storage. - key : str - Cached object key, e.g. 'output'. - coordinates : Coordinates, optional - Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output - mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. - - Returns - ------- - data : any - The cached data. - - Raises - ------- - CacheError - If the data is not in the cache. - ''' -``` - -Clear the cache: - -```python -def rem(self, node=None, key=None, coordinates=None, mode=None): - '''Delete cached data for this node. - - Parameters - ------------ - node : Node - node requesting storage. - key : str, optional - Delete only cached objects with this key. - coordinates : Coordinates - Delete only cached objects for these coordinates. - mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. - ''' -``` - - * delete the entire cache: `cache.rem()` - * delete the entire cache for a node: `cache.rem(node)` - * delete a specific cached object (coordinate-depedent): `cache.rem(node=node, key=mykey, coordinates=coords)` - * delete a specific cached object (coordinate-independent): `cache.rem(node=node, key=mykey)` - * delete all cached data for a node for specific coordinates: `cache.rem(node=node, coordinates=coords)` - * delete all cached objects for a node with a given key for any coordinates: `cache.rem(node=node, key=mykey)` - -Just check the cache: - -```python -def has(self, node, key, coordinates=None, mode=None): - '''Check for cached data for this node - - Parameters - ------------ - node : Node - node requesting storage. - key : str - Cached object key, e.g. 'output'. - coordinates: Coordinate, optional - Coordinates for which cached object should be checked - mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. - - Returns - ------- - has_cache : bool - True if there as a cached object for this node for the given key and coordinates. - ''' -``` - -### Developer interface - -#### Private attributes - -* `_cache_mode` : Override for what types of the `CacheStore` are affected: 'ram','disk','network','all'. If this is not `None` then `node._cache_mode` and the `mode` argument for the `CacheStore` public methods will be ignored. - - -# Implementation Notes -* Idempotence can be supported using key computed via some hashing protocol. Maybe the key is the hash of a dictionary representing parameter key/values and there is some specification on the keys like the time parameter is always "datetime". -* Keys used to retrieve data need to support a distributed system. Basically, cache system needs to know, or be able to figure out, what server to make a request to when data is cached over some kind of network storage system (multiple s3 buckets, sharded database, HDFS). - * This can be accomplished by: - * reserving a portion of a retrieval key (hash) to specify the network resource - * could be hard to change the "key" for a particular network resource after the fact. - * likely need to support adding (maybe deleting) network resources over time. - * Using a centralized lookup server - * central server could be bottleneck - * retrieval becomes at least two network bassed queries (one to central server, and then one for data). -* libraries to keep in mind: - * [pathos](https://github.com/uqfoundation/pathos) Graph Execution Manager with generic `map` (think python map) and `pipe` (aka `apply`, not sure what this is maybe reduction) operations. - * execution manager versions (implementations) include: - * MPI - * ssh (rpc over ssh tunnels) - * multiprocessors - * [klepto](https://github.com/uqfoundation/klepto) In-memory and archival cache. Works-with/used-by [pathos](https://github.com/uqfoundation/pathos) in conjunction with [`dill`](https://github.com/uqfoundation/dill) (serializer that extends pickle). - * Supported caching algorithms: `lfu`,`lru`,`mru`,`rr`. Does not have the something that takes into account "compute time". - * Supported archival systems: file, directory,sql-table,sql-database,directory of hdf5 files, single hdf5 file - * Supported key calculations: raw-python objects (obj), hash of obj, str(obj), pickle of obj - * [Dask](https://dask.pydata.org/en/latest/) Execution manager. Dataframe style computations. - * [cachey](https://github.com/dask/cachey) Cache that works with Dask. In-memory only. Simple dictionary style interface. Uses formula to compute priority of data abssed on size, use, and time to compute. - * key/value memory caching servises from the database-driven website community: - * [memcached](http://memcached.org/) ([wikipedia](https://en.wikipedia.org/wiki/Memcached)) - * [redis](https://redis.io/) ([wikipedia](https://en.wikipedia.org/wiki/Redis)) - * [geohash](https://en.wikipedia.org/wiki/Geohash) - * [Thespian](https://github.com/kquick/Thespian) Python Actor package. Supports several underlying communication systems, including TCP and UDP network bassed communication for multiprocess, either single or multi-computer, systems. Communication is restricted to objects that can be pickled and there is no shared memory. Looks like a mature but not neglected package, and the Actor model of parallel/concurrent execution can be pretty easy to reason about. diff --git a/doc/source/specs/coordinates.md b/doc/source/specs/coordinates.md deleted file mode 100644 index e51164b4c..000000000 --- a/doc/source/specs/coordinates.md +++ /dev/null @@ -1,371 +0,0 @@ -# Requirements -* Support lat, lon, alt, and time dimensions -* Support arbitrary stacking of coordinates (eg. lat_lon lat_time, lat_lon_time, alt_time, etc.) -* Support dimensions for coordinates, but allow overwriting to ignore dimensions -* Support different coordinate reference systems for lat, lon dimensions -* Support arbitrary start, end, steps that get filled with the native coordinates when requested -* Support intersection of coodinates -* Support addition of coordinates -* Support dropping coordintes -* Support replacement of coordinates for certain dimensions -* Support sub-selection of coordinates based on index -* Support multiple types of calculated, and explicitly labelled coordinates - * This includes: - * Explicit lists of coordinates - * Uniform or linspace coordinates - * Rotated coordinates - * Mapped coordinates (mapping based on i/j(/k/l) index of data array) - -# Example Use cases -1. I want to create a regularly spaced set of (lat, lon) coordinates -2. I want to create an irregularly spaced set of (time) coordinates -3. I want to create a rotated lat,lon coordinate system -4. I want to create a regularly spaced line of (lat_lon) coordinates -5. I have a set of native_coordinates, and I want to sub-select based on a window of coordinates. The step size of the native_coordinates should be preserved -6. I have a set of native_coordinates, but I only want every 5th element in the time dimension -7. I have a set of native_coordinates, but I want to replace the coordinates of the time dimension with a different coordinate -8. I have a set of (lat_lon) stacked coordinates but I want a new set of coordinates that describe a box containing all of these (lat_lon) coordinates -- this should be at the resolution of the lat_lon coordinates -9. I want to create (lat,lon) coordinates in UTM-feet using zone T18 with the NAD87 CRS -10. I want the intersection of the (lat,lon) coordinates in UTM-feet using zone T18 with the NAD87 CRS with another coordinate system using CRS WGS84 in geodetic coordinates. -11. I want to specify a single (lat, lon) coordinate represented as a single point. Intersections with another coordinate should only give results at the point. -12. I want to specify a single (lat, lon) coordinate representing an area of a certiain size (dlat, dlon). Intersections with another coordinate will give resoluts over this area. -13. TODO: - -# Specification - -## General - - * Coordinate values are either `float` or `np.datetime64` - * Coordinate deltas are either `float` or `np.timedelta64` - * When input: - - numerical values and deltas are cast to `float` - - string values are parsed with `np.datetime64` - - string deltas are split and then parsed by `np.timedelta64` (e.g. `'1,D'` -> `np.timedelta(1, 'D')`) - -## Coordinates for One Dimension - -### BaseCoordinates1d - -`BaseCoordinates1d` is the base class for Coordinates1d and StackedCoordinates. - -Common Attributes: - - `name` - - `dims` - - `size` - - `is_monotonic` - - `is_uniform` - -Common Methods: - - `select(bounds, outer=False)` - - `intersect(other, outer=False)` - -Common Operators: - - `len` - - `[]`: supports integer, slice, index array, or boolean array - -### Coordinates1d(BaseCoordinates1d) - -Base class for a singe dimension of coordinates. - -Common Traits: -- `name`: Enum('lat', 'lon', 'time', 'alt') -- `units`: Units -- `coord_ref_sys`: string, default: 'WGS84' -- `ctype`: Enum('point', 'left', 'rigth', 'midpoint'), default: 'midpoint' -- `extents`: array, shape (2,), optional - -Common Properties -- `coordinates`: read-only array -- `properties`: dictionary of coordinate properties -- `dtype`: `np.datetime64` or `float` -- `size`: int -- `bounds`: read-only array, `[float, float]`. Coordinate values min and max. -- `area_bounds`: read-only array, `[float, float]`. - - For point coordinates, this is just the `bounds`. - - For segment coordinates, use `extents` when available, otherwise calculated depending on the ctype mode. -- `is_monotonic`: bool -- `is_descending`: bool -- `is_uniform`: bool - -Common Methods - - `select(bounds, outer=False)`: select coordinates within the given bounds - - returns a new Coordinates1d - - if `outer` is true, the coordinates just outside the bounds are returned - - `intersect(other, outer=False)`: intersect these coordinates with other coordinates - - returns a new Coordinates1d - - if `other` is Coordinates1d, raises an exception if `other.name != self.name` - - if `other` is StackedCoordinates or Coordinates, intersects with `other[self.name]` - - `add`: add delta value to each coordinate value - - returns a new Coordinates1d object by default, or can be modified in-place - - `concat(other)`: concatenate additional coordinates - - returns a new Coordinates1d object by default, or can be used in-place - - raises an exception if `other.name != self.name` - - *not sure we need this...* - -Operators - - `+`, `+=`: wraps add - - `-`, `-=`: wraps add - -### ArrayCoordinates1d(Coordinates1d) - -A 1d array of coordinates. - -Constructor: - - `ArrayCoordinates1d()`: empty coordinates - - `ArrayCoordinates1d(value)`: a singleton array with one coordinate - - `ArrayCoordinates1d(values)`: an array of coordinates - -Alternate Constructors: - - `ArrayCoordinates1d.from_xarray(xcoords)`: Create coordinates from an xarray dimension (a named DataArray) - -Traits: - - `coords`: array - -### UniformCoordinates1d(Coordinates1d) - -Uniformly-spaced coordinates, parameterized by a start, stop, and step. - -Constructor - - `UniformCoordinates1d(start, stop, step)` - - `UniformCoordinates1d(start, stop, step=step)` - - `UniformCoordinates1d(start, stop, size=N)` - -Alternate Constructors - - `UniformCoordinates1d.from_tuple(items)`: items is either (start, stop, step) or (start, stop, size) - -Traits: - - start: float or datetime64 - - stop: float or datetime64 - - step: float or timedelta64 - -### StackedCoordinates(BaseCoordinates1d) - -Coordinates for two or more physical dimensions that are indexed together (aligned, as opposed to defining a grid). This class should be considered an implementation detail that behaves like a tuple (is iterable) but also facilitates a common interface with Coordinates1d by mapping indexing and other methods to its Coordinates1d objects. - -Properties -- `name`: dimension names joined by an underscore, e.g. `'lat_lon'` -- `dims`: tuple of dimension names -- `coordinates`: pandas.MultiIndex -- `size`: int -- `is_monotonic`: if all of its dimensions are monotonic -- `is_monotonic`: if all of its dimensions are uniform - -Methods - - `select(bounds)`: select coordinates within the given bounds - - returns a new StackedCoordinates object - - TODO: how are bounds defined? is this necessary - - `intersect(other)`: intersect these coordinates with other coordinates - - outer=False, intersection of intersect in each dimension - - outer=True, union of intersection in each dimension - - `concat(other)`: concatenate additional coordinates - - returns a new StackedCoordinates object by default, or can be used in-place - - raises an exception if `other.name != self.name` - - *not sure we need this...* - -## Convenience Functions - -### crange - -``` -podpac.crange(0, 2.5, 0.5) -podpac.crange('2018-01-01', '2018-01-10', '2,D') -podpac.crange(np.datetime64('2018-01-01'), np.datetime64('2018-01-10'), np.timedelta64(2, 'D')) -``` - - - Similar to np.arange, but - - contains the stop value if it falls exactly on a step - - supports time coordinates, either datetime64/timedelta64 or strings - - Under the hood, this is implemented by mapping directly to `UniformCoordinates1d(start, stop, step)` - -### clinspace - -``` -podpac.clinspace(0, 2.5, 5) -podpac.clinspace('2018-01-01', '2018-01-09', 5) -podpac.clinspace(np.datetime64('2018-01-01'), np.datetime64('2018-01-09'), 5) -podpac.clinspace([0, 1], [2.5, 20], 5) -podpac.clinspace([0, 1, '2018-01-01'], [2.5, 20, '2018-01-09'], 5) -``` - - - Similar to np.linspace, but - - supports time coordinates, either datetime64 or strings - - supports stacked coordinates - - Under the hood, this is implemented by mapping to `UniformCoordinates1d(start, stop, size=N)` and `StackedCoordinates` - -### Shorthand/aliases - -For ease-of-use, the following aliases will be available in the toplevel `podpac` package: - - - `_ca`: `ArrayCoordinates1d` - - `_cu`: `UniformCoordinates1d` - - `_stacked`: `StackedCoordinates` - -16 shortcut functions *may* also be defined, e.g. `_ca_lat`, `_cu_time`, etc - -So that the following single coordinates are equivalent - -``` -UniformCoordinates1d(0, 1, 0.1, name='lat') -podpac._cu(0, 1, 0.1, name='lat') -podpac._cu_lat(0, 1, 0.1) -podpac.crange(0, 1, 0.1, name='lat') - -# these are also functionally equivalent to the above -ArrayCoordinates1d(np.arange(0, 1.1, 0.1), name='lat') -podpac._ca(np.arange(0, 1.1, 0.1), name='lat') -podpac._ca_lat(np.arange(0, 1.1, 0.1)) -``` - -And the following stacked coordinates are equivalent - -``` -StackedCoordinates([ - UniformCoordinates1d(0, 1, size=100, name='lat'), - UniformCoordinates1d(0, 1, size=100, name='lon'), - UniformCoordinates1d('2018-01-01', '2018-01-10', size=10, name='time')]) - -podpac._stacked([ - podpac._cu(0, 1, size=10, name='lat'), - podpac._cu(0, 1, size=10, name='lon'), - podaac._cu('2018-01-01', '2018-01-10', size=10, name='time')]) - -podpac._stacked([ - podpac._cu_lat(0, 1, size=100), - podpac._cu_lon(0, 1, size=100)), - podpac._cu_time('2018-01-01', '2018-01-10', size=10)]) - -podpac.clinspace((0, 0, '2018-01-01'), (1, 1, '2018-01-10'), 10, dims=['lat', 'lon', 'time']) -``` - -## Multidemensional Coordinates - -### Coordinate Creation - -Coordinates are created from a list or dict containing BaseCoordinates1d objects (Coordinates1d or StackedCoordinates). - - - `Coordinates()` - - `Coordinates([coords1d, coords1d])` - - `Coordinates([StackedCoordinates([coords1d, coords1d]), coords1d])` - - `Coordinates([(coords1d, coords1d), coords1d])` - - `Coordinates([array1d, array1d], dims=['lat', 'lon'])` - - `Coordinates([(array1d, array1d), array], dims=['lat_lon', 'time'])` - - `Coordinates([array2d, array1d], dims=['lat_lon', 'time'])` - -### Alternate Constructors - - - `Coordinates.from_xarray(xcoords)`: maps multi-dimensional xarray `DataArrayCoordinates` to podpac `Coordinates` - - `Coordinates.grid(...)` - - `Coordinates.points(...)` - -### Traits - -- `coord_ref_sys`: Unicode -- `default_distance_units`: Units -- `default_ctype`: Enum('left', 'right', 'midpoint', 'point') - -### Properties - - - `dims`: tuple(str, str, ...) - - `shape`: tuple(int, int, ...) - - `ndim`: int - - `size`: int - - `udims`: tuple(str, str, ...), "unstacked" - - `coords`: `xarray.core.DataArrayCoordinates` - -### Methods - -In general, methods will return a new Coordinates object by default, with an option to modify the Coordinates in-place. - - * `keys()`: return dims, stacked - * `values()`: returns BaseCoordinates1d, stacked - * `items()`: zips keys and values - * `get(key, default=None)`: wraps [] with fallback - * `add`: TODO - * `concat(other)`: TODO - * `intersect(other, outer=False)`: maps intersection to each dimension, returns new Coordinates object - * `drop(dims)`: remove dimensions, stacked dimensions are removed together - * `udrop(dims)`: remove dimensions, stacked dimensions can be removed individually - * `transpose`: TODO - * `iterchunks`: TODO - -### Operators - - * `[dim]`: Get the BaseCoordinates1d object for the given dimension, stacked or unstacked - * `[dim] = `: Set the coordinates for this dimension. - - If the dimension is part of stacked dimensions, raises an exception (*we could change this to allow setting part of stacked coordinates and just validate that the size is the same*) - - If the dimension is missing, raises an exception (*we could change this to add dimensions*) - -TODO: `coords['lat_lon']['lat'] = ArrayCoordinates(...)` vs `coords[lat] = ArrayCoordinates(...)` - -### Example - -``` -lat = ArrayCoordinates1d(np.arange(10)) -lon = ArrayCoordinates1d(np.arange(10)) -time = ArrayCoordinates1d(np.range(4)) -lat_lon = StackedCoordinates(lat, lon) -coords = Coordinates([lat_lon, time]) - -coords.dims -> ('lat_lon', 'time') -coords.shape -> (10, 4) -coords.ndim -> 2 -coords.size -> 40 -coords.udims -> ('lat', 'lon', 'time') -coords.keys() -> ('lat_lon', 'time') -coords.values() -> (lat_lon, time) -coords.items() -> (('lat_lon', lat_lon), ('time', time)) -coords['lat_lon'] -> lat_lon -coords['time'] -> time -coords['lat'] -> lat -coords['alt'] -> KeyError -coords.get('alt') -> None -len(coords) -> 2 -coords.drop('time') -> Coordinates with only lat_lon -coords.drop('lat_lon') -> Coordinates with only time -coords.drop('alt') -> KeyError -coords.drop('lat') -> KeyError -coords.drop(['time', 'lat_lon']) -> empty Coordinates -coords.drop(['time', 'alt'], ignore_missing=True) -> Coordinates with only lat_lon -coords.udrop('lat') -> Coordinates with only time and lon -``` - -### Miscellaneous Examples - - -Some equivalent ways to copy: - -``` -coords_copy = Coordinates(other) # I'm not sure we need this one -coords_copy = Coordinates(other.coords1d) -coords_copy = Coordinates(other.coords1d.values()) -``` - -Select specific dimensions - -``` -dims = ['lat', 'lon'] -c1 = Coordinates([other.coords1d[dim] for dim in dims]) -``` - -Downsample (even if some dimensions are stacked) - -``` -c2 = Coordinates([c[::10] for c in other.coords1d.values()]) -``` - -Downsample only the time dimension (only works if time is not stacked) - -``` -d = other.coords1d.copy() -d['time'] = d['time'][::10] -c3 = Coordinates(d) -``` - -The safe way would would be: - -``` -d = other.coords1d.copy() -k = d.get('time') -d[k] = d[k][::10] -c3 = Coordinates(d) -``` diff --git a/doc/source/specs/data-source.md b/doc/source/specs/data-source.md deleted file mode 100644 index a407e8335..000000000 --- a/doc/source/specs/data-source.md +++ /dev/null @@ -1,165 +0,0 @@ -# Requirements - -* Quick and simple to define arbitrary data source given podpac Coordinates and some kind of data -* Provide a base class for all other data types, including user defined data types - -# Example Use cases - -* I want to load tabular data from a local file and create a podpac data source quickly -* I want to create a data source class that provides access to a new server that serves GeoTIFF data -* I want to create a data source class the provides access to a new flat file data source -* I want to access my data on Backblaze B2 instead of S3 bucket -* I want to access a GeoServer which implements WCS -* I want to provide access to a new NASA dataset that is stored on S3 in a new data format - -# Specification - -## DataSource Class - -`DataSource(Node)` is the base class from which all data other data sources are implemented. Extends the `Node` base class. - -#### Traits - -(See [Node documentation](https://podpac.org/user/api/podpac.core.node.html#podpac.core.node.Node) for nodes attributes) - -- `source`: Any, required - + The location of the source. Depending on the child node this can be a filepath, numpy array, or dictionary as a few examples. -- `interpolation`: - - str, tuple (str, list of podpac.core.data.interpolate.Interpolator) - - Definition of interpolation methods for each dimension of the native coordinates. If input is a string, it must match one of the interpolation shortcuts defined in - :ref:podpac.core.data.interpolate.INTERPOLATION_SHORTCUTS. The interpolation method associated - with this string will be applied to all dimensions in the native coordinates. - - If input is a tuple, the first element of the tuple must be a string interpolation method name. - The second element must be a list of :ref:podpac.core.data.interpolate.Interpolator. This list specifies - the order in which the interpolators will be applied. - The interpolation method defined by this tuple will be applied to all dimensions in the native coordinates. - - If input is a dict, the key must be string dimension names. The value can be a string or tuple following - the same convention as specified above. The string or tuple will be applied - to the dimension specied by the key. An exception will be raised if the dictionary - does not contain a key for all unstacked dimensions of the native coodrinates. - All dimension keys must be unstacked even if the underlying coordinate dimensions are stacked. - - If input is a podpac.core.data.interpolate.Interpolation, this interpolator will be used without modication. - - By default, the interpolation method is set to `'nearest'` for all dimensions. -- `coordinate_index_type`: Enum('list','numpy','xarray','pandas'). By default this is `numpy` -- `nan_vals`: List - + list of values from source data that should be interpreted as 'no data' or 'nans' (replaces `no_data_vals`) - -#### Private Members - -- `_interpolation` - tl.Instance(Interpolation). Interpolation method returned from Interpolation(`interpolation`) constructor - -*TODO* : the names of these memebers will be changed - -- `_requested_coordinates` = tl.Instance(Coordinates, allow_none=True) -- `_requested_source_coordinates` = tl.Instance(Coordinates) -- `_requested_source_coordinates_index` = tl.List() -- `_requested_source_data` = tl.Instance(UnitsDataArray) - -#### Properties - -#### Contructor - -- FUTURE: After implementing a limiter on the request size, implement: - + Take one input (i.e. `evaluate`) that will automatically execute the datasource at the native_coordinates on contruction. This will allow a shortcut when you just want to load a simple data source for processing with other more complication data sources - -#### Methods - -- `eval(coordinates, output=None, method=None)`: Evaluate this node using the supplied coordinates - + `self.requested_coordinates` gets set to the coordinates that are input - + Instantiate `_interpolation` classes with data sources based on input to `interpolation` - + remove dims that don't exist in native coordinates - + intersect the `self.requested_coordinates` with `self.native_coordinates` to create `self.requested_source_coordinates` and `self.requested_source_coordinates_index` to get requested via `get_data`. DataSource `coordinate_index_type` informs `self.requested_source_coordinates_index` (Array[int], Array[boolean], List[slices]) - + interpolate requested coordinates `self.requested_source_coordinates` using `_interpolate_requested_coordinates()`. - + `self.requested_source_coordinates` coordinates MUST exists exactly in the data source native coordinates. - + run `_get_data` which runs the user defined `get_data()` and check/fix order of dims when UnitsDataArray or Xarray is returned from get_data. Otherwise create UnitsDataArray using values from get_data and requested_source_coordinates. This return from `_get_data()` sets `self.requested_source_data` - + Run `_interpolate()` - + Set `self.evaluated` to True - + Output the user the UnitsDataArray passed back from interpolate -- `get_data(coordinates, coordinates_index)`: - + Raise a `NotImplementedError` - + `coordinates` and `coordinates_index` are guarenteed to exists in the datasource - + return an UnitsDataArray, numpy array, or xarray of values. this will get turned into a UnitsDataArray aftwards using `self.requested_source_coordinates` even if the xarray passes back coordinates - * Need to check/fix order of dims in UnitsDataArray and Xarray case -- `get_native_coordinates()`: return the native coordinates from the data source. By default, this should return `self.native_coordinates` if defined, otherwise raise a `NotImplementedError` -- `definition()`: Pipeline node definition for DataSource nodes. - + Transport mechanism for going to the cloud - + Leave as is - -#### Private Methods - -- `_interpolate_requested_coordinates()`: Use `self.requested_coordinates`, `self.native_coordinates`, `self.interpolate` to determine the requested coordinates interpolated into the source coordinates. - + overwrites `self.requested_source_coordinates` (Coordinates) to interpolated coordinates that need to get requested from the data source via `get_data`. - + These coordinates MUST exists exactly in the data source native coordinates - + Returns None -- `_interpolate()`: Use `self.interpolate` and call the appropriate functions in the `interpolate` module - + Returns a UnitDataArray which becomes the output of the eval method - -#### Operators - -## User Interface - -Simple datasource that doesn't need its own subclass - -```python -class ArraySource(DataSource): - source = tl.Instance(np.ndarray) - - def get_data(self, coordinates, coordinates_index): - return self.source[coordinates_index] -``` - -Using this basic class - -```python -source = np.random.rand(101, 101) -source_coordinates = coordinates(lat=(-25, 25, 101), lon=(-25, 25, 101), order=['lat', 'lon']) -node = ArraySource(source=source, native_coordinates=source_coordinates) -output = node.eval(node.native_coordinates) -``` - -FUTURE: automatically execute - -```python -source = np.random.rand(101, 101) -source_coordinates = coordinates(lat=(-25, 25, 101), lon=(-25, 25, 101), order=['lat', 'lon']) -node = ArraySource(source=source, native_coordinates=source_coordinates) -output = node.eval() -``` - -More Complicated Source. -This datasource gets new `native_coordinates` every time the source updates. - -```python -class RasterioSource(DataSource): - - source = tl.Unicode(allow_none=False) # specifies source MUST be a Unicode - dataset = tl.Any(allow_none=True) - band = tl.CInt(1).tag(attr=True) - - @tl.default('dataset') - def open_dataset(self, source): - return module.open(source) - - @tl.observe('source') - def _update_dataset(self): - self.dataset = self.open_dataset() - self.native_coordinates = self.get_native_coordinates() - - def get_native_coordinates(self): - dlon = self.dataset.width - dlat = self.dataset.height - left, bottom, right, top = self.dataset.bounds - - return podpac.Coordinate(lat=(top, bottom, dlat), - lon=(left, right, dlon), - order=['lat', 'lon']) - - def get_data(self, coordinates, coordinates_index): - data = self.dataset.read(coordinates_index) - return data -``` - -## Developer interface - - -TODO: Add developer interface specs diff --git a/doc/source/specs/interpolation.md b/doc/source/specs/interpolation.md deleted file mode 100644 index 49b27612b..000000000 --- a/doc/source/specs/interpolation.md +++ /dev/null @@ -1,214 +0,0 @@ -# Requirements - -- decide and run interpolate before request to `get_data` to minimize data on the wire, if possible - - handle points/segments at the edge of a data source boundary - - handle out-of-extents cases -- support most used geospatial temporal interpolation methods -- require minimal configuration for generic types of interpolation -- configuration works in python or in pipeline equally -- handle stacked and unstacked coordinates in the same pipelines -- specify priority for interpolators -- Each interpolator should know what coordinates it can interpolate to/from -- Each interpolator should know how to select appropriate coordinates from the datasource -Multiple interpolators may be required for each request: - - Time could use NN interpolation - - lat/lon could use bilinear with a specified CRS/Projection -- The order of these multiple interpolators matters from an optimization perpsective - - Consider the size of the dataset before/after interpolation - - Consider the cost of the interpolation operation -- **TODO**: support custom interpolator classes? -- **TODO**: support `+` (`__add__`) for Interpolators? - -# Example Use cases - -- user requests a single value at a point between coordinates in a datasource -- user requests an array of coordinates from a dataset with a different coordinate system -- user requests data at coordinates that overlap the extent of the native dataset -- user requests a different type of interpolation for lat/long and time - - -# User Interface - -**DataSource** usage (primary): - -```python - -# specify method -DataSource(... interpolation='nearest') - -# specify dict of methods for each dimension -# value can be a method string, or a tuple which overrides the default method InterpolationMethods -DataSource(... interpolation={ - 'lat': 'bilinear', - 'lon': 'bilinear', - 'time': ('nearest', [Nearest, Rasterio]) - }) - -# specify an interpolation class itself (useful when you need to override args to Interpolators) -DataSource(... interpolation=Interpolation() ) -``` - -## `Interpolation` - -Used to organize multiple interpolators across the dimensions of a DataSource - -```python -# definition generally comes from DataSource `interpolation` -# **kwargs will get passed on to interpolators -Interpolation(definition, coordinates, **kwargs) - -# simple string definition applies to all dimensions -# this string must be a member of INTERPOLATION_SHORTCUTS -Interpolation('nearest', coordinates) - -# more complicated specify a tuple with a method name and the order of Interpolators to use this method with -# the method string in the tuple does not necessarily have to be a member of INTERPOLATION_SHORTCUTS -Interpolation( ('nearest', [Rasterio, Nearest]), coordinates) - -# more complicated dict definition specifies interpolators for each dimension -Interpolation({ - ('lat', 'lon'): 'bilinear', - }, coordinates) - -# most complicated dict definition specifies tuple interpolators for dimensions -Interpolation({ - 'lat': 'nearest', - 'lon': ('nearest', [Rasterio, ...]) - 'time': ('nearest', [Nearest, ...]) - }, coordinates) - -# most complicated dict definition specifies tuple interpolators for dimensions -Interpolation({ - ('lat', 'lon'): 'nearest', - 'time': ('nearest', [Nearest, ...]) - }, coordinates) - - -# can include kwargs that get passed on to Interpolator methods -Interpolation({ - 'lat': 'nearest', - 'lon': ('nearest', [Rasterio, ...]) - 'time': ('nearest', [Nearest, ...]) - }, coordinates, tolerance=1, arg='for interpolator') -``` - - -## `Interpolator` - -Create **Interpolator** classes that can be assigned in **Interpolation** definitions. - -Examples: `NearestNeighbor`, `Rasterio`, `Scipy` - -```python - -class MyInterpolator(Interpolator): - """ - class has traits - """ - - # method = tl.Unicode() defined by Interpolator base class - tolerance = tl.Int() - kwarg = tl.Any() - - init(self): - # act on inputs after traits have been set up - - validate(self, requested_coordinates, source_coordinates): - # validate requested coordinates and source_coordinates can - # be interpolated with`self.method` - - select_coordinates(self, requested_coordinates, source_coordinates, source_coordinates_index): - # down select coordinates (if valid) based on `self.method` - - interpolate(source_coordinates, source_data, requested_coordinates, output): - # interpolate data (if valid) based on `self.method` -``` - - - - - - -# Specification - -## Constants - -- `INTERPOLATION_METHODS`: dict of shortcut: InterpolationMethod class -- `INTERPOLATION_SHORTCUTS`: List - - Only include the supported interpolation options -- `INTERPOLATION_DEFAULT`: 'nearest', interpolation method if none is specified to datasource or if dimensions are left out of the dict - -## Utility methods - -## `Interpolator` Abstract Class - -This is a traits based class since users may be expected to define Interoplator subclasses - -#### Traits - -- `method`: tl.Unicode() - current interpolation method name - - -#### Methods - -- `validate(requested_coordinates, source_coordinates)` - + check to see if the Interpolator supports the requested/soruce coordinate pair for the current method - + `InterpolationMethod` raises a `NotImplemented` if child does not overide -- `interpolate(source_coordinates, source_data, requested_coordinates, requested_data)` - + `InterpolationMethod` raises a `NotImplemented` if child does not overide -- `select_coordinates(requested_coordinates, source_coordinates, source_coordinates_idx)` - + `InterpolationMethod` raises a `NotImplemented` if child does not overide - -### `NearestNeighbor` - -### `NearestPreview` - -- can select coordinates - -### `Rasterio` - -### `Scipy` - - - -## `Interpolation` Class - -#### Constructor - -- `__init__(definition, coordinates, **kwargs)`: - + `definition`: (str, dict, tuple) - + `coordinates` - + kwargs will get passed through the Interpolator classes - -#### Members - -#### Private members - - -- `_definition`: dict { dim: ('method', [Interpolator]) } - - -**Cost Optimization** - -TODO: figure out where to implement optimization steps -- `cost_func`: tl.CFloat(-1) - + rough cost FLOPS/DOF to do interpolation -- `cost_setup`: tl.CFloat(-1) - + rough cost FLOPS/DOF to set up the interpolator - -#### Methods - -- `interpolate(source_coordinates, source_data, requested_coordinates, requested_data)`: run the interpolator -- `select_coordinates(requested_coordinates, source_coordinates, source_coordinates_idx)`: interpolate child coordinates -- `to_pipeline()`: export interpolator class to pipeline -- `from_pipeline()`: create interpolator class from pipeline - - -## InterpolatorException - -- custom exception for interpolation errors - - - - - diff --git a/doc/source/specs/nodes.md b/doc/source/specs/nodes.md deleted file mode 100644 index 1005717e1..000000000 --- a/doc/source/specs/nodes.md +++ /dev/null @@ -1,372 +0,0 @@ -# Requirements -* Must provide an evaluation interface with consistent rules for outputs -* Must provide a way of instantiating outputs following these rules -* Provide methods to test if these rules are obeyed for child classes -* Provide a consistent interface/rules for passing runtime parameters -* Must provide an interface for finding available coordinates within a pipeline -* Must provide an interface for caching and retrieving cached results -* Must provide methods to inspect the complete pipeline and: - * Ouput json formatted text used to reproduce it - * Report inconsistencies or inabilities to produce pipelines - * Evaluate pipelines at a point with indications of where data came from (which nodes are active within compositors) -* Must give the same output if evaluated from a newly instantiated node, or previously instantiated and evaluated node -* Must define public attributes that should be implemented/available to all nodes -* Must define public parameters that should be implemented/available to all nodes -* Potentially provide a mechanism to evaluate nodes in parallel or remotely on the cloud (This may be handled by an evaluation manager) -* Could provide a method to estimate cost of executing a job over given coordinates using AWS cloud -* Provide a standard methods for creators of child nodes to: - * Specify defaults for attributes and parameters - * Specify defaults for input nodes - * initialze the node without overwriting __init__ - -# Example Use cases -## Typical -* The primary use case is essentially: A user evaluates nodes written by various authors with the same set of coordinates and the results can interact together using common mathematical operators (e.g. +-/*) -* A users wants to retrieve all of the available times for a complex node pipeline in the native coordinates of the underlying datasets - * In this case, there may be multiple datasets spanning different times with different temporal resolutions -* After interactively creating a complex processing pipeline, a users wants to: - * share their work using the .json format - * evaluate the node for a larger region using cloud-resources - * inspect outputs from various stages of the algorithm to debug/analyze the results - -## Advanced -* Advanced users create new nodes to interface with custom data sources - * May specify cachining behaviour for expensive to calculate quantities such as indexes - * May specify settings that should be saved as part of the user's setting file - * Should be able to test if node is properly implemented -* Advanced users create new nodes to implement custom algorithms (see Algorithm Node spec) -* Advanced users create new nodes to composite various nodes together following custom rules (see Compositor Node spec) -* Advanced users create new nodes to construct pipelines from custom json (see Pipeline Node spec) -* Advanced users inspect results from an evaluation by examining the cached data quantities (see caching spec) - -# Specification -## User Interface -Starting from here for examples below: -```python -import numpy as np -import traitlets as tl -from podpac.core.node import Node -from podpac.core.coordinate import Coordinate -from podpac.core.units import UnitsDataArray, ureg -node = Node() -``` -### Methods -#### __init__(...) -* Any attributes set at this stage are constant for all executions - -#### eval(coordinates, output=None, method=None) -```python -def eval(coordinates, output=None, method=None): - ''' - Parameters - ----------- - coordinates : podpac.Coordinate - The set of coordinates requested by a user. The Node will be evaluated using these coordinates. - output : podpac.UnitsDataArray, optional - Default is None. Optional input array used to store the output data. When supplied, the node will not allocate its own memory for the output array. This array needs to have the correct dimensions and coordinates. - method : str, optional - Default is None. How the node will be evaluated: serial, parallel, on aws, locally, etc. Currently only local evaluation is supported. - - Returns - -------- - podpac.core.units.UnitsDataArray - - Raises - ------- - NotImplementedError: Base class raises this because the interface needs to be implemented by children. - CoordinateError: If Node contains a dimension not present in the requsted coordinates - ''' - pass -``` - -**Notes on what's returned: ** -* This function should always return a `UnitsDataArray` object. - ```python - >>> node.native_coordinates = Coordinate(lat=(90, -90, -1.), lon=(-180, 180, 2.), order=['lat', 'lon']) - >>> type(node.initialize_output_array()) - podpac.core.units.UnitsDataArray - ``` -* This `UnitsDataArray` may contain the following dimensions `['lat', 'lon', 'time', 'alt']` -* For cases with multiple outputs, it may additionally contain the field `band` - * This is to supported datasets such as multi-spectral imagery - ```python - >>> grey = UnitsDataArray(np.ones((2, 1)), dims=['lat', 'lon'], coords=[[0, 1], [0]]) - >>> rgba = UnitsDataArray(np.ones((2, 1, 4)), dims=['lat', 'lon', 'band'], coords=[[0, 1], [0], ['r', 'g', 'b', 'a']]) - >>> grey + rgba - - array([[[2., 2., 2., 2.]], - - [[2., 2., 2., 2.]]]) - Coordinates: - * lat (lat) int32 0 1 - * lon (lon) int32 0 - * band (band) >> grey = UnitsDataArray(np.ones((2, 1)), dims=['lat', 'lon'], - coords=[[0, 1], [0]], attrs={'units': ureg.m}) - >>> rgba1 = UnitsDataArray(np.ones((2, 1, 4)), dims=['lat', 'lon', 'band'], - coords=[[0, 1], [0], ['r', 'g', 'b', 'a']], - attrs={'units': ureg.km}) - >>> grey + rgba1 - - array([[[1001., 1001., 1001., 1001.]], - - [[1001., 1001., 1001., 1001.]]]) - Coordinates: - * lat (lat) int32 0 1 - * lon (lon) int32 0 - * band (band) >> rgba2 = UnitsDataArray(np.ones((2, 1, 4)), dims=['lat', 'lon', 'band'], - coords=[[0, 1], [0], ['r', 'g', 'b', 'a']], - attrs={'units': {'r': ureg.m, 'g': ureg.ft, - 'b': ureg.km, 'a': ureg.mile}}) - - >>> grey + rgba2 - - array([[[2., 1.3048, 1001., 1610.344]], - - [[2., 1.3048, 1001., 1610.344]]]) - Coordinates: - * lat (lat) int32 0 1 - * lon (lon) int32 0 - * band (band) , 'g': , 'b': >> node.native_coordinates = Coordinate(lat=(90, -90, -1.), lon=(-180, 180, 2.), order=['lat', 'lon']) - >>> node.evaluated_coordinates = Coordinate(lon=(-180, 180, 4.), lat=(90, -90, -2.), order=['lon', 'lat']) - >>> node.initialize_output_array().dims - ('lon', 'lat') - ``` -* If the underlying Node has unstacked dimensions not in the request, an exception is raised - * eg. Node has `['lat', 'lon', 'time']` dimensions, but coordinates only have `['time']` - ```python - >>> node.native_coordinates = Coordinate(lat=(90, -90, -1.), lon=(-180, 180, 2.), order=['lat', 'lon']) - >>> node.evaluated_coordinates = Coordinate(lon=(-180, 180, 4.),) - >>> node.initialize_output_array() - CoordinateError: 'Dimension "lat" not present in requested coordinates with dims ["lon"]' - ``` - * Because some datasets may be huge, and without information about the subset, the safest behaviour is to throw an exception -* If the underlying Node has *stacked* dimensions not in the request, raise an exception if the Node native_coordinates contains duplicates for the requested dimensions, otherwise just drop the missing dimension from the coordinates. - * eg. Node has `[lat_lon_time]` dimensions, but coordinates only have `['time'`], we can drop the lat and lon portion of the stacked coordinates as long as that doesn't result in duplicate times. Note that this doesn't change the dimensionality, but is required for correct xarray broadcasting. -* If the request has unstacked dimensions not in the Node, just return without those dimensions - * eg. Node has `['lat', 'lon']` dimensions, but evaluated coordinates have `['time', 'lon', 'lat']` then `UnitsDataArray` will have dimensions `['lon', 'lat']` - ```python - >>> node.native_coordinates = Coordinate(lat=45, lon=0, order=['lat', 'lon']) - >>> node.evaluated_coordinates = Coordinate(lat=45, lon=0, time='2018-01-01', order=['lat', 'lon', 'time']) - >>> node.initialize_output_array() - - array([[nan]]) - Coordinates: - * lat (lat) float64 45.0 - * lon (lon) float64 0.0 - ``` -* If the request has *stacked* dimensions not in the underlying Node, add the missing coordinates. - * eg. Node has `[lat_lon]` dimensions, but coordinates have `['lat_lon_time'`], we need to add the time portion to the stacked coordinates for correct xarray broadcasting. -* The output will contain metadata on the: - * Units - * Styling? - * Other metadata to track provenance? - -#### eval_group(group_coordinates, method=None) - -This is just a helper function that loops through the coordinates in the group calling eval. - -```python - def eval_group(self, group_coordinates, method=None): - '''Evaluate GroupCoordinates in a loop. - - Parameters - ---------- - group_coordinates : GroupCoordinates - coordinates group to evaluate - - Returns - ------- - outputs : list - list of UnitsDataArray, one for each Coordinates in the group. - ''' -``` - -#### find_coordinates -```python -def find_coordinates(dims=None, bounds=None, number=None, sortby='size', stop_types=[Compositor, DataSource]): - '''This is a helper function to get all the native coordinates within a pipeline, sorted by - the largest extents, or highest resolution - - Parameters - ----------- - dims : str/list, optional - The dimension or set of dimensions for which the user wants to find the underlying dataset coordinates. If None, all available dimensions for underlying coordinates will be found. Stacked dimensions cannot be used (or are automatically unstacked) - bounds : dict, optional - Default is None, in which case search is not restricted to a bounding window. Bound within which to search for coordinates. Format should be {'dim': (start, end)} - number : int, optional - Default is None, in which case all coordinates found are returned. Otherwise, only the first `number` of coordinates are returned, based on the `sortby` specified. If number is <0, the the last `number` of coordinates are returned. - sortby : str, optional - Default is 'size'. `sortby` should be in ['size', 'extents']. If 'size', the returned coordinates are sorted by the number of coordinates in the dimension, with the largest first. If 'extents', the returned coordinates are sorted by the largest geographical extent. In case of ties, the sorting looks at the other option to break the tie. - stop_type : list, optional - List of node types where search should be stopped. By default, searches stop at DataSource and Compositor nodes. Remove the compositor node to search for native coordinates of individual files, but this may take a long time. - - Returns - -------- - OrderedDict: - Format is as follows: {dims[0]: {'node_address0': Coordinate1D, - 'node_address1': Coordinate1D, ...}, - dims[1]: {'node_address2': Coordinate1D, - 'node_address0': Coordinate1D, ...}} - where find_coordinates(dims)[dims[0]].items() are sorted as specified - - Notes - ------ - The `native_coordinates` could be retrieved using (for example): - >>> c = node.find_coordinates('time') - >>> node_key = list(c['time'].keys())[0] - >>> node[node_key].native_coordinates - ''' - pass -``` - -#### public cache interface - -Put data into the cache: - -```python -def put_cache(self, data, key, coordinates=None): - '''Cache data for this node. - - Parameters - ------------ - data : any - Data to cache - key : str - Cached object key, e.g. 'output'. - coordinates : Coordinates, optional - Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output - ''' -``` - -Get data from the cache: - -```python -def get_cache(self, key, coordinates=None): - '''Get cached data for this node. - - Parameters - ------------ - key : str - Cached object key, e.g. 'output'. - coordinates : Coordinates, optional - Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output - - Returns - ------- - data : any - The cached data. - - Raises - ------- - CacheError - If the data is not in the cache. - ''' -``` - -Clear the cache: - -```python -def del_cache(self, key=None, coordinates=None): - '''Delete cached data for this node. - - Parameters - ------------ - key : str, optional - Delete only cached objects with this key. - coordinates : Coordinates - Delete only cached objects for these coordinates. - ''' -``` - - * delete the entire cache for this node: `node.del_cache()` - * delete a specific cached object (coordinate-depedent): `node.del_cache(key=mykey, coordinates=coords)` - * delete a specific cached object (coordinate-independent): `node.del_cache(key=mykey)` - * delete all cached data for this node for specific coordinates: `node.del_cache(coordinates=coords)` - * delete all cached objects for this node with a given key for any coordinates: `node.del_cache(key=mykey)` - -Just check the cache: - -```python -def has_cache(self, key, coordinates=None): - '''Check for cached data for this node - - Parameters - ------------ - key : str - Cached object key, e.g. 'output'. - coordinates: Coordinate, optional - Coordinates for which cached object should be checked - - Returns - ------- - has_cache : bool - True if there as a cached object for this node for the given key and coordinates. - ''' -``` - -### definition -```python -def definition(self, type='dict'): - ''' Returns the pipeline node definition in the desired format - - Parameters - ----------- - type: str, optional - Default is 'dict', which returns a dictionary definition of the pipeline. - 'json': returns a json-formatted text string of the pipeline - 'rich': returns a javascript widget <-- should this be 'widget' instead? - 'node': Returns a PipelineNode instance of this object - - Returns - -------- - various - Depends on type, see above. - ''' -``` -* There are a few shortcut properties that call this function - * `pipeline_json` - * `pipeline_rich` - * `pipeline_dict` - * `pipeline_node` - - -## Developer interface -### Public Attributes -* `native_coordinates`: Underlying data source coordiantes, when available -* `units`: Units associated with the output of this node -* `interpolation`: Interpolation method used with this node - -### Private Attributes -* `_output`: last output from this node - property, looks in cache -* `_requested_coordinates`: last coordinates used for evaluating the node -* `_output_coordinates`: output coordinates from the last evaluation - -### Additional public methods -( Some of these are already documented in the code ) -* cache_path: file where object is cached -* cache_dir: directory where objects are cached -* init: Used for initialization of derived classes -* _first_init: Used to do any initialization before any of the core initialization is done -* create_output_array -* get_cached_object: similar go 'get', but specifies a particular filename? Maybe superceded by caching refactor? -* save_to_disk? - -### Testing -* I think testing should live in the common_test_utils. I.e. separate from this node. diff --git a/doc/source/specs/user_stories.md b/doc/source/specs/user_stories.md deleted file mode 100644 index e25b438a3..000000000 --- a/doc/source/specs/user_stories.md +++ /dev/null @@ -1,2 +0,0 @@ -TODO: Add user stories for final Year 1 Demo. -E.g. I want to get SMAP data over a lat/lon region at the native coordinates for a particular date. \ No newline at end of file diff --git a/podpac/__init__.py b/podpac/__init__.py index 483c4daf3..6943ce2b9 100644 --- a/podpac/__init__.py +++ b/podpac/__init__.py @@ -42,20 +42,20 @@ def makedirs(name, mode=511, exist_ok=False): from podpac.core.settings import settings from podpac.core.coordinates import Coordinates, crange, clinspace from podpac.core.node import Node, NodeException -import podpac.core.authentication as authentication -from podpac.core.utils import NodeTrait +from podpac.core.utils import cached_property from podpac.core.units import ureg as units, UnitsDataArray # Organized submodules # These files are simply wrappers to create a curated namespace of podpac modules from podpac import algorithm +from podpac import authentication from podpac import data from podpac import interpolators from podpac import coordinates from podpac import compositor -from podpac import pipeline from podpac import managers from podpac import utils +from podpac import style ## Developer API from podpac import core diff --git a/podpac/alglib/__init__.py b/podpac/alglib/__init__.py new file mode 100644 index 000000000..fad0d686b --- /dev/null +++ b/podpac/alglib/__init__.py @@ -0,0 +1,8 @@ +""" +Datalib Public API + +This module gets imported in the root __init__.py +and exposed its contents to podpac.datalib +""" + +from podpac.alglib import climatology diff --git a/podpac/alglib/climatology.py b/podpac/alglib/climatology.py new file mode 100644 index 000000000..99d78bc67 --- /dev/null +++ b/podpac/alglib/climatology.py @@ -0,0 +1,64 @@ +""" +PODPAC node to compute beta fit of seasonal variables +""" + +import logging +import numpy as np +import xarray as xr +import traitlets as tl +from lazy_import import lazy_module +from scipy.stats import beta +from scipy.stats._continuous_distns import FitSolverError + +# optional imports +h5py = lazy_module("h5py") + +# Internal dependencies +import podpac +from podpac.core.algorithm.stats import DayOfYearWindow + +# Set up logging +_log = logging.getLogger(__name__) + + +class BetaFitDayOfYear(DayOfYearWindow): + """ + This fits a beta distribution to day of the year in the requested coordinates over a window. It returns the beta + distribution parameters 'a', and 'b' as part of the output. It may also return a number of percentiles. + + Attributes + ----------- + percentiles: list, optional + Default is []. After computing the beta distribution, optionally compute the value of the function for the given + percentiles in the list. The results will be available as an output named ['d0', 'd1',...] for each entry in + the list. + """ + + percentiles = tl.List().tag(attr=True) + rescale = tl.Bool(True).tag(attr=True) + + @property + def outputs(self): + return ["a", "b"] + ["d{}".format(i) for i in range(len(self.percentiles))] + + def function(self, data, output): + # define the fit function + try: + data[data == 1] -= 1e-6 + data[data == 0] += 1e-6 + a, b, loc, scale = beta.fit(data, floc=0, fscale=1) + except FitSolverError as e: + print(e) + return output + + # populate outputs for this point + output.loc[{"output": "a"}] = a + output.loc[{"output": "b"}] = b + for ii, d in enumerate(self.percentiles): + output.loc[{"output": "d" + str(ii)}] = beta.ppf(d, a, b) + + return output + + def rescale_outputs(self, output, scale_max, scale_min): + output[..., 2:] = (output[..., 2:] * (scale_max - scale_min)) + scale_min + return output diff --git a/podpac/algorithm.py b/podpac/algorithm.py index 3501834a5..742250143 100644 --- a/podpac/algorithm.py +++ b/podpac/algorithm.py @@ -20,6 +20,7 @@ Kurtosis, DayOfYear, GroupReduce, + ResampleReduce, ) from podpac.core.algorithm.coord_select import ExpandCoordinates, SelectCoordinates, YearSubstituteCoordinates from podpac.core.algorithm.signal import Convolution, SpatialConvolution, TimeConvolution diff --git a/podpac/authentication.py b/podpac/authentication.py new file mode 100644 index 000000000..2bef51622 --- /dev/null +++ b/podpac/authentication.py @@ -0,0 +1,8 @@ +""" +Authentication Public Module +""" + +# REMINDER: update api docs (doc/source/api.rst) to reflect changes to this file + + +from podpac.core.authentication import RequestsSessionMixin, S3Mixin diff --git a/podpac/compositor.py b/podpac/compositor.py index 418c3c5ae..166dcf323 100644 --- a/podpac/compositor.py +++ b/podpac/compositor.py @@ -4,4 +4,5 @@ # REMINDER: update api docs (doc/source/user/api.rst) to reflect changes to this file -from podpac.core.compositor import Compositor, OrderedCompositor +from podpac.core.compositor.ordered_compositor import OrderedCompositor +from podpac.core.compositor.tile_compositor import UniformTileCompositor, UniformTileMixin diff --git a/podpac/core/algorithm/algorithm.py b/podpac/core/algorithm/algorithm.py index d770ae05c..a9ed89e3d 100644 --- a/podpac/core/algorithm/algorithm.py +++ b/podpac/core/algorithm/algorithm.py @@ -29,40 +29,17 @@ class BaseAlgorithm(Node): """ @property - def _inputs(self): - # this first version is nicer, but the gettattr(self, ref) can take a - # a long time if it is has a default value or is a property - - # return = { - # ref:getattr(self, ref) - # for ref in self.trait_names() - # if isinstance(getattr(self, ref, None), Node) - # } - + def inputs(self): + # gettattr(self, ref) can take a long time, so we inspect trait.klass instead return { ref: getattr(self, ref) for ref, trait in self.traits().items() if hasattr(trait, "klass") and Node in inspect.getmro(trait.klass) and getattr(self, ref) is not None } - @property - def base_definition(self): - """Base node definition. - - Returns - ------- - OrderedDict - Extends base description by adding 'inputs' - """ - - d = super(BaseAlgorithm, self).base_definition - inputs = self._inputs - d["inputs"] = OrderedDict([(key, inputs[key]) for key in sorted(inputs.keys())]) - return d - def find_coordinates(self): """ - Get the available native coordinates for the inputs to the Node. + Get the available coordinates for the inputs to the Node. Returns ------- @@ -70,7 +47,7 @@ def find_coordinates(self): list of available coordinates (Coordinate objects) """ - return [c for node in self._inputs.values() for c in node.find_coordinates()] + return [c for node in self.inputs.values() for c in node.find_coordinates()] class Algorithm(BaseAlgorithm): @@ -117,7 +94,7 @@ def eval(self, coordinates, output=None): inputs = {} if settings["MULTITHREADING"]: - n_threads = thread_manager.request_n_threads(len(self._inputs)) + n_threads = thread_manager.request_n_threads(len(self.inputs)) if n_threads == 1: thread_manager.release_n_threads(n_threads) else: @@ -132,13 +109,13 @@ def f(node): pool = thread_manager.get_thread_pool(processes=n_threads) # Evaluate nodes in parallel/asynchronously - results = [pool.apply_async(f, [node]) for node in self._inputs.values()] + results = [pool.apply_async(f, [node]) for node in self.inputs.values()] # Collect the results in dictionary - for key, res in zip(self._inputs.keys(), results): + for key, res in zip(self.inputs.keys(), results): inputs[key] = res.get() - # This prevents any more tasks from being submitted to the pool, and will close the workers one done + # This prevents any more tasks from being submitted to the pool, and will close the workers once done pool.close() # Release these number of threads back to the thread pool @@ -146,7 +123,7 @@ def f(node): self._multi_threaded = True else: # Evaluate nodes in serial - for key, node in self._inputs.items(): + for key, node in self.inputs.items(): inputs[key] = node.eval(coordinates) self._multi_threaded = False @@ -159,7 +136,7 @@ def f(node): if output is None: output = result else: - output[:] = result + output[:] = result.data[:] elif isinstance(result, xr.DataArray): if output is None: output = self.create_output_array( @@ -195,7 +172,10 @@ class UnaryAlgorithm(BaseAlgorithm): Developers of new Algorithm nodes need to implement the `eval` method. """ - source = NodeTrait() + source = NodeTrait().tag(attr=True) + + # list of attribute names, used by __repr__ and __str__ to display minimal info about the node + _repr_keys = ["source"] @tl.default("outputs") def _default_outputs(self): diff --git a/podpac/core/algorithm/coord_select.py b/podpac/core/algorithm/coord_select.py index db75abaad..2f5a96933 100644 --- a/podpac/core/algorithm/coord_select.py +++ b/podpac/core/algorithm/coord_select.py @@ -32,7 +32,7 @@ class ModifyCoordinates(UnaryAlgorithm): Modification parameters for given dimension. Varies by node. """ - coordinates_source = NodeTrait() + coordinates_source = NodeTrait().tag(attr=True) lat = tl.List().tag(attr=True) lon = tl.List().tag(attr=True) time = tl.List().tag(attr=True) @@ -67,7 +67,9 @@ def eval(self, coordinates, output=None): self._requested_coordinates = coordinates self._modified_coordinates = Coordinates( - [self.get_modified_coordinates1d(coordinates, dim) for dim in coordinates.dims], crs=coordinates.crs + [self.get_modified_coordinates1d(coordinates, dim) for dim in coordinates.dims], + crs=coordinates.crs, + validate_crs=False, ) for dim in self._modified_coordinates.udims: @@ -106,9 +108,16 @@ class ExpandCoordinates(ModifyCoordinates): Expansion parameters for the given dimension: The options are:: * [start_offset, end_offset, step] to expand uniformly around each input coordinate. * [start_offset, end_offset] to expand using the available source coordinates around each input coordinate. + + bounds_only: bool + Default is False. If True, will only expand the bounds of the overall coordinates request. Otherwise, it will + expand around EACH coordinate in the request. For example, with bounds_only == True, and an expansion of 3 + you may expand [5, 6, 8] to [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], whereas with bounds_only == False, it becomes + [[2, 5, 8], [3, 6, 9], [5, 8, 11]] (brackets added for clarity, they will be concatenated). """ substitute_eval_coords = tl.Bool(False, read_only=True) + bounds_only = tl.Bool(False).tag(attr=True) def get_modified_coordinates1d(self, coords, dim): """Returns the expanded coordinates for the requested dimension, depending on the expansion parameter for the @@ -121,7 +130,7 @@ def get_modified_coordinates1d(self, coords, dim): Returns ------- - expanded : Coordinates1d + expanded : :class:`podpac.coordinates.Coordinates1d` Expanded coordinates """ @@ -133,7 +142,7 @@ def get_modified_coordinates1d(self, coords, dim): return coords1d if len(expansion) == 2: - # use available native coordinates + # use available coordinates dstart = make_coord_delta(expansion[0]) dstop = make_coord_delta(expansion[1]) @@ -141,14 +150,30 @@ def get_modified_coordinates1d(self, coords, dim): if len(available_coordinates) != 1: raise ValueError("Cannot implicity expand coordinates; too many available coordinates") acoords = available_coordinates[0][dim] - cs = [acoords.select((add_coord(x, dstart), add_coord(x, dstop))) for x in coords1d.coordinates] + if self.bounds_only: + cs = [ + acoords.select( + add_coord(coords1d.coordinates[0], dstart), add_coord(coords1d.coordinates[-1], dstop) + ) + ] + else: + cs = [acoords.select((add_coord(x, dstart), add_coord(x, dstop))) for x in coords1d.coordinates] elif len(expansion) == 3: # use a explicit step size dstart = make_coord_delta(expansion[0]) dstop = make_coord_delta(expansion[1]) step = make_coord_delta(expansion[2]) - cs = [UniformCoordinates1d(add_coord(x, dstart), add_coord(x, dstop), step) for x in coords1d.coordinates] + if self.bounds_only: + cs = [ + UniformCoordinates1d( + add_coord(coords1d.coordinates[0], dstart), add_coord(coords1d.coordinates[-1], dstop), step + ) + ] + else: + cs = [ + UniformCoordinates1d(add_coord(x, dstart), add_coord(x, dstop), step) for x in coords1d.coordinates + ] else: raise ValueError("Invalid expansion attrs for '%s'" % dim) diff --git a/podpac/core/algorithm/generic.py b/podpac/core/algorithm/generic.py index fd1bccbd3..a29b5cd2b 100644 --- a/podpac/core/algorithm/generic.py +++ b/podpac/core/algorithm/generic.py @@ -4,6 +4,7 @@ from __future__ import division, unicode_literals, print_function, absolute_import +import sys import warnings import numpy as np @@ -21,15 +22,18 @@ from podpac.core.utils import NodeTrait from podpac.core.algorithm.algorithm import Algorithm +if sys.version_info.major == 2: + + class PermissionError(OSError): + pass + class GenericInputs(Algorithm): """Base class for Algorithms that accept generic named inputs.""" - inputs = tl.Dict() + inputs = tl.Dict(read_only=True) - @property - def _inputs(self): - return self.inputs + _repr_keys = ["inputs"] def _first_init(self, **kwargs): trait_names = self.trait_names() @@ -38,7 +42,14 @@ def _first_init(self, **kwargs): raise RuntimeError("Trait '%s' is reserved and cannot be used as an Generic Algorithm input" % key) input_keys = [key for key in kwargs if key not in trait_names and isinstance(kwargs[key], Node)] inputs = {key: kwargs.pop(key) for key in input_keys} - return super(GenericInputs, self)._first_init(inputs=inputs, **kwargs) + self.set_trait("inputs", inputs) + return super(GenericInputs, self)._first_init(**kwargs) + + @property + def _base_definition(self): + d = super(GenericInputs, self)._base_definition + d["inputs"] = self.inputs + return d class Arithmetic(GenericInputs): @@ -54,6 +65,8 @@ class Arithmetic(GenericInputs): eqn = tl.Unicode().tag(attr=True) params = tl.Dict().tag(attr=True) + _repr_keys = ["eqn"] + def init(self): if not settings.allow_unsafe_eval: warnings.warn( @@ -191,13 +204,15 @@ class Mask(Algorithm): """ - source = NodeTrait() - mask = NodeTrait() + source = NodeTrait().tag(attr=True) + mask = NodeTrait().tag(attr=True) masked_val = tl.Float(np.nan).tag(attr=True) bool_val = tl.Float(1).tag(attr=True) bool_op = tl.Enum(["==", "<", "<=", ">", ">="], default_value="==").tag(attr=True) in_place = tl.Bool(False).tag(attr=True) + _repr_keys = ["source", "mask"] + def algorithm(self, inputs): """ Sets the values in inputs['source'] to self.masked_val using (inputs['mask'] ) """ @@ -237,7 +252,8 @@ class Combine(GenericInputs): @tl.default("outputs") def _default_outputs(self): - return list(self.inputs.keys()) + input_keys = list(self.inputs.keys()) + return input_keys def algorithm(self, inputs): return np.stack([inputs[key] for key in self.inputs], axis=-1) diff --git a/podpac/core/algorithm/signal.py b/podpac/core/algorithm/signal.py index 7354f8176..933a462a9 100644 --- a/podpac/core/algorithm/signal.py +++ b/podpac/core/algorithm/signal.py @@ -138,12 +138,12 @@ def eval(self, coordinates, output=None): add_coord(coord.start, s_start * coord.step), add_coord(coord.stop, s_end * coord.step + 1e-07 * coord.step), coord.step, - **coord.properties, + **coord.properties ) ) exp_slice.append(slice(-s_start, -s_end)) exp_slice = tuple(exp_slice) - expanded_coordinates = Coordinates(exp_coords) + expanded_coordinates = Coordinates(exp_coords, crs=coordinates.crs, validate_crs=False) if settings["DEBUG"]: self._expanded_coordinates = expanded_coordinates diff --git a/podpac/core/algorithm/stats.py b/podpac/core/algorithm/stats.py index 86d2ca8dc..60ee65baa 100644 --- a/podpac/core/algorithm/stats.py +++ b/podpac/core/algorithm/stats.py @@ -7,6 +7,7 @@ import warnings from operator import mul from functools import reduce +import logging import xarray as xr import numpy as np @@ -14,15 +15,19 @@ import traitlets as tl from six import string_types +# Internal dependencies import podpac from podpac.core.coordinates import Coordinates from podpac.core.node import Node -from podpac.core.algorithm.algorithm import UnaryAlgorithm +from podpac.core.algorithm.algorithm import UnaryAlgorithm, Algorithm from podpac.core.utils import common_doc, NodeTrait from podpac.core.node import COMMON_NODE_DOC, node_eval COMMON_DOC = COMMON_NODE_DOC.copy() +# Set up logging +_log = logging.getLogger(__name__) + class Reduce(UnaryAlgorithm): """Base node for statistical algorithms @@ -45,21 +50,6 @@ def _first_init(self, **kwargs): kwargs["dims"] = [kwargs["dims"]] return super(Reduce, self)._first_init(**kwargs) - def _get_dims(self, out): - """ - Translates requested reduction dimensions. - - Parameters - ---------- - out : UnitsDataArray - The output array - - Returns - ------- - list - List of dimensions after reduction - """ - def dims_axes(self, output): """Finds the indices for the dimensions that will be reduced. This is passed to numpy. @@ -840,6 +830,8 @@ def reduce(self, x): # Time-Grouped Reduce # ============================================================================= +_REDUCE_FUNCTIONS = ["all", "any", "count", "max", "mean", "median", "min", "prod", "std", "sum", "var", "custom"] + class GroupReduce(UnaryAlgorithm): """ @@ -857,15 +849,13 @@ class GroupReduce(UnaryAlgorithm): Source node """ - coordinates_source = NodeTrait(allow_none=True) + _repr_keys = ["source", "groupby", "reduce_fn"] + coordinates_source = NodeTrait(allow_none=True).tag(attr=True) # see https://github.com/pydata/xarray/blob/eeb109d9181c84dfb93356c5f14045d839ee64cb/xarray/core/accessors.py#L61 - groupby = tl.CaselessStrEnum(["dayofyear"]) # could add season, month, etc - - reduce_fn = tl.CaselessStrEnum( - ["all", "any", "count", "max", "mean", "median", "min", "prod", "std", "sum", "var", "custom"] - ) - custom_reduce_fn = tl.Any() + groupby = tl.CaselessStrEnum(["dayofyear", "weekofyear", "season", "month"], allow_none=True).tag(attr=True) + reduce_fn = tl.CaselessStrEnum(_REDUCE_FUNCTIONS).tag(attr=True) + custom_reduce_fn = tl.Any(allow_none=True, default_value=None).tag(attr=True) _source_coordinates = tl.Instance(Coordinates) @@ -873,33 +863,6 @@ class GroupReduce(UnaryAlgorithm): def _default_coordinates_source(self): return self.source - def _get_source_coordinates(self, requested_coordinates): - # get available time coordinates - # TODO do these two checks during node initialization - available_coordinates = self.coordinates_source.find_coordinates() - if len(available_coordinates) != 1: - raise ValueError("Cannot evaluate this node; too many available coordinates") - avail_coords = available_coordinates[0] - if "time" not in avail_coords.udims: - raise ValueError("GroupReduce coordinates source node must be time-dependent") - - # intersect grouped time coordinates using groupby DatetimeAccessor - avail_time = xr.DataArray(avail_coords.coords["time"]) - eval_time = xr.DataArray(requested_coordinates.coords["time"]) - N = getattr(avail_time.dt, self.groupby) - E = getattr(eval_time.dt, self.groupby) - native_time_mask = np.in1d(N, E) - - # use requested spatial coordinates and filtered available times - coords = Coordinates( - time=avail_time.data[native_time_mask], - lat=requested_coordinates["lat"], - lon=requested_coordinates["lon"], - order=("time", "lat", "lon"), - ) - - return coords - @common_doc(COMMON_DOC) @node_eval def eval(self, coordinates, output=None): @@ -922,12 +885,7 @@ def eval(self, coordinates, output=None): If source it not time-depended (required by this node). """ - self._source_coordinates = self._get_source_coordinates(coordinates) - - if output is None: - output = self.create_output_array(coordinates) - - source_output = self.source.eval(self._source_coordinates) + source_output = self.source.eval(coordinates) # group grouped = source_output.groupby("time.%s" % self.groupby) @@ -939,14 +897,25 @@ def eval(self, coordinates, output=None): # standard, e.g. grouped.median('time') out = getattr(grouped, self.reduce_fn)("time") - # map - eval_time = xr.DataArray(coordinates.coords["time"]) - E = getattr(eval_time.dt, self.groupby) - out = out.sel(**{self.groupby: E}).rename({self.groupby: "time"}) - output[:] = out.transpose(*output.dims).data + out = out.rename({self.groupby: "time"}) + if output is None: + coords = podpac.coordinates.merge_dims( + [coordinates.drop("time"), Coordinates([out.coords["time"]], ["time"])] + ) + coords = coords.transpose(*out.dims) + output = self.create_output_array(coords, data=out.data) + else: + output.data[:] = out.data[:] + + ## map + # eval_time = xr.DataArray(coordinates.coords["time"]) + # E = getattr(eval_time.dt, self.groupby) + # out = out.sel(**{self.groupby: E}).rename({self.groupby: "time"}) + # output[:] = out.transpose(*output.dims).data return output + @property def base_ref(self): """ Default node reference/name in node definitions @@ -959,6 +928,100 @@ def base_ref(self): return "%s.%s.%s" % (self.source.base_ref, self.groupby, self.reduce_fn) +class ResampleReduce(UnaryAlgorithm): + """ + Resample a time-dependent source node using a statistical operation to achieve the result. + + Attributes + ---------- + custom_reduce_fn : function + required if reduce_fn is 'custom'. + resampleby : str + datetime sub-accessor. Currently 'dayofyear' is the enabled option. + reduce_fn : str + builtin xarray groupby reduce function, or 'custom'. + source : podpac.Node + Source node + """ + + _repr_keys = ["source", "resampleby", "reduce_fn"] + coordinates_source = NodeTrait(allow_none=True).tag(attr=True) + + # see https://github.com/pydata/xarray/blob/eeb109d9181c84dfb93356c5f14045d839ee64cb/xarray/core/accessors.py#L61 + resample = tl.Unicode().tag(attr=True) # could add season, month, etc + reduce_fn = tl.CaselessStrEnum(_REDUCE_FUNCTIONS).tag(attr=True) + custom_reduce_fn = tl.Any(allow_none=True, default_value=None).tag(attr=True) + + _source_coordinates = tl.Instance(Coordinates) + + @tl.default("coordinates_source") + def _default_coordinates_source(self): + return self.source + + @common_doc(COMMON_DOC) + @node_eval + def eval(self, coordinates, output=None): + """Evaluates this nodes using the supplied coordinates. + + Parameters + ---------- + coordinates : podpac.Coordinates + {requested_coordinates} + output : podpac.UnitsDataArray, optional + {eval_output} + + Returns + ------- + {eval_return} + + Raises + ------ + ValueError + If source it not time-depended (required by this node). + """ + + source_output = self.source.eval(coordinates) + + # group + grouped = source_output.resample(time=self.resample) + + # reduce + if self.reduce_fn == "custom": + out = grouped.reduce(self.custom_reduce_fn) + else: + # standard, e.g. grouped.median('time') + out = getattr(grouped, self.reduce_fn)() + + if output is None: + coords = podpac.coordinates.merge_dims( + [coordinates.drop("time"), Coordinates([out.coords["time"]], ["time"])] + ) + coords = coords.transpose(*out.dims) + output = self.create_output_array(coords, data=out.data) + else: + output.data[:] = out.data[:] + + ## map + # eval_time = xr.DataArray(coordinates.coords["time"]) + # E = getattr(eval_time.dt, self.groupby) + # out = out.sel(**{self.groupby: E}).rename({self.groupby: "time"}) + # output[:] = out.transpose(*output.dims).data + + return output + + @property + def base_ref(self): + """ + Default node reference/name in node definitions + + Returns + ------- + str + Default node reference/name in node definitions + """ + return "%s.%s.%s" % (self.source.base_ref, self.resample, self.reduce_fn) + + class DayOfYear(GroupReduce): """ Group a time-dependent source node by day of year and compute a statistic for each group. @@ -974,3 +1037,145 @@ class DayOfYear(GroupReduce): """ groupby = "dayofyear" + + +class DayOfYearWindow(Algorithm): + """ + This applies a function over a moving window around day-of-year in the requested coordinates. + It includes the ability to rescale the input/outputs. Note if, the input coordinates include multiple years, the + moving window will include all of the data inside the day-of-year window. + + Users need to implement the 'function' method. + + Attributes + ----------- + source: podpac.Node + The source node from which the statistics will be computed + window: int, optional + Default is 0. The size of the window over which to compute the distrubtion. This is always centered about the + day-of-year. The total number of days is always an odd number. For example, window=2 and window=3 will compute + the beta distribution for [x-1, x, x + 1] and report it as the result for x, where x is a day of the year. + scale_max: podpac.Node, optional + Default is None. A source dataset that can be used to scale the maximum value of the source function so that it + will fall between [0, 1]. If None, uses self.scale_float[0]. + scale_min: podpac.Node, optional + Default is None. A source dataset that can be used to scale the minimum value of the source function so that it + will fall between [0, 1]. If None, uses self.scale_float[1]. + scale_float: list, optional + Default is []. Floating point numbers used to scale the max [0] and min [1] of the source so that it falls + between [0, 1]. If scale_max or scale_min are defined, this property is ignored. If these are defined, the data + will be rescaled only if rescale=True below. + If None and scale_max/scale_min are not defined, the data is not scaled in any way. + rescale: bool, optional + Rescales the output data after being scaled from scale_float or scale_min/max + """ + + source = tl.Instance(podpac.Node).tag(attr=True) + window = tl.Int(0).tag(attr=True) + scale_max = tl.Instance(podpac.Node, default_value=None, allow_none=True).tag(attr=True) + scale_min = tl.Instance(podpac.Node, default_value=None, allow_none=True).tag(attr=True) + scale_float = tl.List(default_value=None, allow_none=True).tag(attr=True) + rescale = tl.Bool(False).tag(attr=True) + + def algorithm(self, inputs): + win = self.window // 2 + source = inputs["source"] + + # Scale the source to range [0, 1], required for the beta distribution + if "scale_max" in inputs: + scale_max = inputs["scale_max"] + elif self.scale_float and self.scale_float[1] is not None: + scale_max = self.scale_float[1] + else: + scale_max = None + + if "scale_min" in inputs: + scale_min = inputs["scale_min"] + elif self.scale_float and self.scale_float[0] is not None: + scale_min = self.scale_float[0] + else: + scale_min = None + + _log.debug("scale_min: {}\nscale_max: {}".format(scale_min, scale_max)) + if scale_min is not None and scale_max is not None: + source = (source.copy() - scale_min) / (scale_max - scale_min) + with np.errstate(invalid="ignore"): + source.data[(source.data < 0) | (source.data > 1)] = np.nan + + # Make the output coordinates with day-of-year as time + coords = xr.Dataset({"time": self._requested_coordinates["time"].coordinates}) + dsdoy = np.sort(np.unique(coords.time.dt.dayofyear)) + latlon_coords = self._requested_coordinates.drop("time") + time_coords = podpac.Coordinates([dsdoy], ["time"]) + coords = podpac.coordinates.merge_dims([latlon_coords, time_coords]) + coords = coords.transpose(*self._requested_coordinates.dims) + output = self.create_output_array(coords) + + # if all-nan input, no need to calculate + if np.all(np.isnan(source)): + return output + + # convert source time coords to day-of-year as well + sdoy = source.time.dt.dayofyear + + # loop over each day of year and compute window + for i, doy in enumerate(dsdoy): + _log.debug("Working on doy {doy} ({i}/{ld})".format(doy=doy, i=i + 1, ld=len(dsdoy))) + + # If either the start or end runs over the year, we need to do an OR on the bool index + # ----->s....<=e------ .in -out + # ..<=e----------->s.. + do_or = False + + start = doy - win + if start < 1: + start += 365 + do_or = True + + end = doy + win + if end > 365: + end -= 365 + do_or = True + + if do_or: + I = (sdoy >= start) | (sdoy <= end) + else: + I = (sdoy >= start) & (sdoy <= end) + + # Scipy's beta function doesn's support multi-dimensional arrays, so we have to loop over lat/lon/alt + lat_f = lon_f = alt_f = [None] + dims = ["lat", "lon", "alt"] + if "lat" in source.dims: + lat_f = source["lat"].data + if "lon" in source.dims: + lon_f = source["lon"].data + if "alt" in source.dims: + alt_f = source["alt"].data + + for alt in alt_f: + for lat in lat_f: + for lon in lon_f: + # _log.debug(f'lat, lon, alt = {lat}, {lon}, {alt}) + loc_dict = {k: v for k, v in zip(dims, [lat, lon, alt]) if v is not None} + + data = source.sel(time=I, **loc_dict).dropna("time").data + if np.all(np.isnan(data)): + continue + + # Fit function to the particular point + output.loc[loc_dict][{"time": i}] = self.function(data, output.loc[loc_dict][{"time": i}]) + + # Rescale the outputs + if self.rescale: + output = self.rescale_outputs(output, scale_max, scale_min) + return output + + def function(self, data, output): + raise NotImplementedError( + "Child classes need to implement this function. It is applied over the data and needs" + " to populate the output." + ) + + def rescale_outputs(self, output, scale_max, scale_min): + output = (output * (scale_max - scale_min)) + scale_min + return output diff --git a/podpac/core/algorithm/test/test_algorithm.py b/podpac/core/algorithm/test/test_algorithm.py index 63ec0b093..6322e6184 100644 --- a/podpac/core/algorithm/test/test_algorithm.py +++ b/podpac/core/algorithm/test/test_algorithm.py @@ -9,6 +9,7 @@ import xarray as xr import podpac +from podpac.core.utils import NodeTrait from podpac.core.node import Node, NodeException from podpac.core.data.array_source import Array from podpac.core.algorithm.utility import Arange @@ -23,43 +24,21 @@ def test_eval_not_implemented(self): with pytest.raises(NotImplementedError): node.eval(c) - def test_base_definition(self): - class MyAlgorithm(BaseAlgorithm): - x = tl.Instance(Node) - y = tl.Instance(Node) - z = tl.Unicode().tag(attr=True) - - node = MyAlgorithm(x=Arange(), y=Arange(), z="abcd") - - d = node.base_definition - assert isinstance(d, OrderedDict) - assert "node" in d - assert "attrs" in d - - # base (node, params) - assert d["node"] == "MyAlgorithm" - assert d["attrs"]["z"] == "abcd" - - # inputs - assert "inputs" in d - assert isinstance(d["inputs"], dict) - assert set(d["inputs"].keys()) == set(["x", "y"]) - def test_find_coordinates(self): class MyAlgorithm(BaseAlgorithm): - x = tl.Instance(Node) - y = tl.Instance(Node) + x = NodeTrait().tag(attr=True) + y = NodeTrait().tag(attr=True) node = MyAlgorithm( - x=Array(native_coordinates=podpac.Coordinates([[0, 1, 2], [10, 20]], dims=["lat", "lon"])), - y=Array(native_coordinates=podpac.Coordinates([[0, 1, 2], [110, 120]], dims=["lat", "lon"])), + x=Array(coordinates=podpac.Coordinates([[0, 1, 2], [10, 20]], dims=["lat", "lon"])), + y=Array(coordinates=podpac.Coordinates([[0, 1, 2], [110, 120]], dims=["lat", "lon"])), ) l = node.find_coordinates() assert isinstance(l, list) assert len(l) == 2 - assert node.x.native_coordinates in l - assert node.y.native_coordinates in l + assert node.x.coordinates in l + assert node.y.coordinates in l class TestAlgorithm(object): @@ -74,7 +53,7 @@ def test_multi_threading(self): with podpac.settings: podpac.settings.set_unsafe_eval(True) - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["DEFAULT_CACHE"] = [] podpac.settings["RAM_CACHE_ENABLED"] = False @@ -99,7 +78,7 @@ def test_multi_threading_cache_race(self): with podpac.settings: podpac.settings["MULTITHREADING"] = True podpac.settings["N_THREADS"] = 3 - podpac.settings["CACHE_OUTPUT_DEFAULT"] = True + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = True podpac.settings["DEFAULT_CACHE"] = ["ram"] podpac.settings["RAM_CACHE_ENABLED"] = True podpac.settings.set_unsafe_eval(True) @@ -134,7 +113,7 @@ def test_multi_threading_stress_nthreads(self): with podpac.settings: podpac.settings["MULTITHREADING"] = True podpac.settings["N_THREADS"] = 8 - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["DEFAULT_CACHE"] = [] podpac.settings["RAM_CACHE_ENABLED"] = False podpac.settings.set_unsafe_eval(True) @@ -147,7 +126,7 @@ def test_multi_threading_stress_nthreads(self): with podpac.settings: podpac.settings["MULTITHREADING"] = True podpac.settings["N_THREADS"] = 9 # 2 threads available after first 7 - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["DEFAULT_CACHE"] = [] podpac.settings["RAM_CACHE_ENABLED"] = False podpac.settings.set_unsafe_eval(True) @@ -217,8 +196,8 @@ def algorithm(self, inputs): def test_multiple_outputs(self): class MyAlgorithm(Algorithm): - x = tl.Instance(Node) - y = tl.Instance(Node) + x = NodeTrait().tag(attr=True) + y = NodeTrait().tag(attr=True) outputs = ["sum", "prod", "diff"] def algorithm(self, inputs): @@ -229,7 +208,7 @@ def algorithm(self, inputs): coords = podpac.Coordinates([[0, 1, 2], [10, 20]], dims=["lat", "lon"]) x = Arange() - y = Array(source=np.full(coords.shape, 2), native_coordinates=coords) + y = Array(source=np.full(coords.shape, 2), coordinates=coords) xout = np.arange(6).reshape(3, 2) # all outputs @@ -249,7 +228,7 @@ def algorithm(self, inputs): class TestUnaryAlgorithm(object): - source = Array(native_coordinates=podpac.Coordinates([[0, 1, 2], [10, 20]], dims=["lat", "lon"])) + source = Array(coordinates=podpac.Coordinates([[0, 1, 2], [10, 20]], dims=["lat", "lon"])) def test_outputs(self): node = UnaryAlgorithm(source=self.source) diff --git a/podpac/core/algorithm/test/test_coord_select.py b/podpac/core/algorithm/test/test_coord_select.py index 7e68637a5..3a0b5d6c8 100644 --- a/podpac/core/algorithm/test/test_coord_select.py +++ b/podpac/core/algorithm/test/test_coord_select.py @@ -17,15 +17,14 @@ class MyDataSource(DataSource): - def get_native_coordinates(self): - return podpac.Coordinates( - [ - podpac.crange("2010-01-01", "2018-01-01", "4,h"), - podpac.clinspace(-180, 180, 6), - podpac.clinspace(-80, -70, 6), - ], - dims=["time", "lat", "lon"], - ) + coordinates = podpac.Coordinates( + [ + podpac.crange("2010-01-01", "2018-01-01", "4,h"), + podpac.clinspace(-180, 180, 6), + podpac.clinspace(-80, -70, 6), + ], + dims=["time", "lat", "lon"], + ) def get_data(self, coordinates, slc): node = Arange() @@ -67,7 +66,7 @@ def test_time_expansion_implicit_coordinates(self): o = node.eval(coords) def test_spatial_expansion_ultiple_outputs(self): - multi = Array(source=np.random.random(coords.shape + (2,)), native_coordinates=coords, outputs=["a", "b"]) + multi = Array(source=np.random.random(coords.shape + (2,)), coordinates=coords, outputs=["a", "b"]) node = ExpandCoordinates(source=multi, lat=(-1, 1, 0.1)) o = node.eval(coords) @@ -93,7 +92,7 @@ def test_time_selection_implicit_coordinates(self): o = node.eval(coords) def test_spatial_selection_multiple_outputs(self): - multi = Array(source=np.random.random(coords.shape + (2,)), native_coordinates=coords, outputs=["a", "b"]) + multi = Array(source=np.random.random(coords.shape + (2,)), coordinates=coords, outputs=["a", "b"]) node = SelectCoordinates(source=multi, lat=(46, 56, 1)) o = node.eval(coords) @@ -114,7 +113,7 @@ def test_year_substitution_orig_coords(self): def test_year_substitution_missing_coords(self): source = Array( source=[[1, 2, 3], [4, 5, 6]], - native_coordinates=podpac.Coordinates( + coordinates=podpac.Coordinates( [podpac.crange("2018-01-01", "2018-01-02", "1,D"), podpac.clinspace(45, 66, 3)], dims=["time", "lat"] ), ) @@ -126,7 +125,7 @@ def test_year_substitution_missing_coords(self): def test_year_substitution_missing_coords_orig_coords(self): source = Array( source=[[1, 2, 3], [4, 5, 6]], - native_coordinates=podpac.Coordinates( + coordinates=podpac.Coordinates( [podpac.crange("2018-01-01", "2018-01-02", "1,D"), podpac.clinspace(45, 66, 3)], dims=["time", "lat"] ), ) @@ -136,7 +135,7 @@ def test_year_substitution_missing_coords_orig_coords(self): assert o["time"].data == xr.DataArray(coords.coords["time"]).data def test_year_substitution_multiple_outputs(self): - multi = Array(source=np.random.random(coords.shape + (2,)), native_coordinates=coords, outputs=["a", "b"]) + multi = Array(source=np.random.random(coords.shape + (2,)), coordinates=coords, outputs=["a", "b"]) node = YearSubstituteCoordinates(source=multi, year="2018") o = node.eval(coords) assert o.time.dt.year.data[0] == 2018 diff --git a/podpac/core/algorithm/test/test_generic.py b/podpac/core/algorithm/test/test_generic.py index c01e53cbf..e1ef7230e 100644 --- a/podpac/core/algorithm/test/test_generic.py +++ b/podpac/core/algorithm/test/test_generic.py @@ -1,5 +1,6 @@ from __future__ import division, unicode_literals, print_function, absolute_import +import sys import warnings import pytest @@ -10,26 +11,27 @@ from podpac.core.algorithm.utility import Arange, SinCoords from podpac.core.algorithm.generic import GenericInputs, Arithmetic, Generic, Mask, Combine +if sys.version_info.major == 2: + from podpac.core.algorithm.generic import PermissionError + class TestGenericInputs(object): def test_init(self): node = GenericInputs(a=Arange(), b=SinCoords()) - assert "a" in node.inputs - assert "b" in node.inputs + assert node.inputs["a"] == Arange() + assert node.inputs["b"] == SinCoords() + + def test_base_definition(self): + node = GenericInputs(a=Arange(), b=SinCoords()) + d = node._base_definition + assert "inputs" in d + assert "a" in d["inputs"] + assert "b" in d["inputs"] def test_reserved_name(self): with pytest.raises(RuntimeError, match="Trait .* is reserved"): GenericInputs(style=SinCoords()) - def test_serialization(self): - node = GenericInputs(a=Arange(), b=SinCoords()) - d = node.definition - assert d[node.base_ref]["inputs"]["a"] in d - assert d[node.base_ref]["inputs"]["b"] in d - - node2 = node.from_definition(d) - assert node2.hash == node.hash - class TestArithmetic(object): def test_init(self): diff --git a/podpac/core/algorithm/test/test_signal.py b/podpac/core/algorithm/test/test_signal.py index 0781b452b..383620672 100644 --- a/podpac/core/algorithm/test/test_signal.py +++ b/podpac/core/algorithm/test/test_signal.py @@ -67,7 +67,7 @@ def test_eval_multiple_outputs(self): lon = clinspace(-80, 70, 40, name="lon") kernel = [[1, 2, 1]] coords = Coordinates([lat, lon]) - multi = Array(source=np.random.random(coords.shape + (2,)), native_coordinates=coords, outputs=["a", "b"]) + multi = Array(source=np.random.random(coords.shape + (2,)), coordinates=coords, outputs=["a", "b"]) node = Convolution(source=multi, kernel=kernel) o = node.eval(Coordinates([lat, lon])) @@ -78,7 +78,7 @@ def test_eval_nan(self): data = np.ones(coords.shape) data[10, 10] = np.nan - source = Array(source=data, native_coordinates=coords) + source = Array(source=data, coordinates=coords) node = Convolution(source=source, kernel=[[1, 2, 1]]) o = node.eval(coords[8:12, 7:13]) diff --git a/podpac/core/algorithm/test/test_stats.py b/podpac/core/algorithm/test/test_stats.py index c87ceca2b..08c4867a7 100644 --- a/podpac/core/algorithm/test/test_stats.py +++ b/podpac/core/algorithm/test/test_stats.py @@ -4,13 +4,16 @@ import numpy as np import xarray as xr import scipy.stats +import traitlets as tl import podpac +from podpac.core.algorithm.utility import Arange from podpac.core.data.array_source import Array from podpac.core.algorithm.stats import Reduce from podpac.core.algorithm.stats import Min, Max, Sum, Count, Mean, Variance, Skew, Kurtosis, StandardDeviation +from podpac.core.algorithm.generic import Arithmetic from podpac.core.algorithm.stats import Median, Percentile -from podpac.core.algorithm.stats import GroupReduce, DayOfYear +from podpac.core.algorithm.stats import GroupReduce, DayOfYear, DayOfYearWindow def setup_module(): @@ -24,11 +27,11 @@ def setup_module(): a[3, 0, 0] = np.nan a[0, 3, 0] = np.nan a[0, 0, 3] = np.nan - source = Array(source=a, native_coordinates=coords) + source = Array(source=a, coordinates=coords) data = source.eval(coords) ab = np.stack([a, 2 * a], -1) - multisource = Array(source=ab, native_coordinates=coords, outputs=["a", "b"]) + multisource = Array(source=ab, coordinates=coords, outputs=["a", "b"]) bdata = 2 * data @@ -40,13 +43,13 @@ def test_auto_chunk(self): node = Min(source=source) with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = "auto" node.eval(coords) def test_chunked_fallback(self): with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False class First(Reduce): def reduce(self, x): @@ -72,7 +75,7 @@ class BaseTests(object): def test_full(self): with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = None node = self.NodeClass(source=source) @@ -88,7 +91,7 @@ def test_full(self): def test_full_chunked(self): with podpac.settings: node = self.NodeClass(source=source, dims=coords.dims) - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = 500 output = node.eval(coords) # xr.testing.assert_allclose(output, self.expected_full) @@ -96,7 +99,7 @@ def test_full_chunked(self): def test_lat_lon(self): with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = None node = self.NodeClass(source=source, dims=["lat", "lon"]) output = node.eval(coords) @@ -105,7 +108,7 @@ def test_lat_lon(self): def test_lat_lon_chunked(self): with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = 500 node = self.NodeClass(source=source, dims=["lat", "lon"]) output = node.eval(coords) @@ -114,7 +117,7 @@ def test_lat_lon_chunked(self): def test_time(self): with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = None node = self.NodeClass(source=source, dims="time") output = node.eval(coords) @@ -123,7 +126,7 @@ def test_time(self): def test_time_chunked(self): with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = 500 node = self.NodeClass(source=source, dims="time") output = node.eval(coords) @@ -132,7 +135,7 @@ def test_time_chunked(self): def test_multiple_outputs(self): with podpac.settings: - podpac.settings["CACHE_OUTPUT_DEFAULT"] = False + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False podpac.settings["CHUNK_SIZE"] = None node = self.NodeClass(source=multisource, dims=["lat", "lon"]) output = node.eval(coords) @@ -265,5 +268,102 @@ class TestGroupReduce(object): pass +class TestResampleReduce(object): + pass + + class TestDayOfYear(object): pass + + +class F(DayOfYearWindow): + cache_output = tl.Bool(False) + force_eval = tl.Bool(True) + + def function(self, data, output): + return len(data) + + +class FM(DayOfYearWindow): + cache_output = tl.Bool(False) + force_eval = tl.Bool(True) + + def function(self, data, output): + return np.mean(data) + + +class TestDayOfYearWindow(object): + def test_doy_window1(self): + coords = podpac.coordinates.concat( + [ + podpac.Coordinates([podpac.crange("1999-12-29", "2000-01-02", "1,D", "time")]), + podpac.Coordinates([podpac.crange("2001-12-30", "2002-01-03", "1,D", "time")]), + ] + ) + + node = Arange() + nodedoywindow = F(source=node, window=1, cache_output=False, force_eval=True) + o = nodedoywindow.eval(coords) + + np.testing.assert_array_equal(o, [2, 2, 1, 1, 2, 2]) + + def test_doy_window2(self): + coords = podpac.coordinates.concat( + [ + podpac.Coordinates([podpac.crange("1999-12-29", "2000-01-03", "1,D", "time")]), + podpac.Coordinates([podpac.crange("2001-12-30", "2002-01-02", "1,D", "time")]), + ] + ) + + node = Arange() + nodedoywindow = F(source=node, window=2, cache_output=False, force_eval=True) + o = nodedoywindow.eval(coords) + + np.testing.assert_array_equal(o, [6, 5, 3, 3, 5, 6]) + + def test_doy_window2_mean_rescale_float(self): + coords = podpac.coordinates.concat( + [ + podpac.Coordinates([podpac.crange("1999-12-29", "2000-01-03", "1,D", "time")]), + podpac.Coordinates([podpac.crange("2001-12-30", "2002-01-02", "1,D", "time")]), + ] + ) + + node = Arange() + nodedoywindow = FM(source=node, window=2, cache_output=False, force_eval=True) + o = nodedoywindow.eval(coords) + + nodedoywindow_s = FM( + source=node, window=2, cache_output=False, force_eval=True, scale_float=[0, coords.size], rescale=True + ) + o_s = nodedoywindow_s.eval(coords) + + np.testing.assert_array_almost_equal(o, o_s) + + def test_doy_window2_mean_rescale_max_min(self): + with podpac.settings: + podpac.settings.set_unsafe_eval(True) + + coords = podpac.coordinates.concat( + [ + podpac.Coordinates([podpac.crange("1999-12-29", "2000-01-03", "1,D", "time")]), + podpac.Coordinates([podpac.crange("2001-12-30", "2002-01-02", "1,D", "time")]), + ] + ) + + node = Arange() + node_max = Arithmetic(source=node, eqn="(source < 5) + source") + node_min = Arithmetic(source=node, eqn="-1*(source < 5) + source") + + nodedoywindow_s = FM( + source=node, + window=2, + cache_output=False, + force_eval=True, + scale_max=node_max, + scale_min=node_min, + rescale=False, + ) + o_s = nodedoywindow_s.eval(coords) + + np.testing.assert_array_almost_equal([0.5] * o_s.size, o_s) diff --git a/podpac/core/algorithm/utility.py b/podpac/core/algorithm/utility.py index 82d1eb4c5..e9c7cab19 100644 --- a/podpac/core/algorithm/utility.py +++ b/podpac/core/algorithm/utility.py @@ -63,7 +63,7 @@ def algorithm(self, inputs): raise ValueError("Coordinate name not in evaluated coordinates") c = self._requested_coordinates[self.coord_name] - coords = Coordinates([c]) + coords = Coordinates([c], validate_crs=False) return self.create_output_array(coords, data=c.coordinates) diff --git a/podpac/core/authentication.py b/podpac/core/authentication.py index 8d38d7f4e..a9dd9ec28 100644 --- a/podpac/core/authentication.py +++ b/podpac/core/authentication.py @@ -1,171 +1,194 @@ """ -PODPAC Authentication +PODPAC Authentication """ -from __future__ import division, unicode_literals, print_function, absolute_import - - -import sys import getpass -import re - -# python 2/3 compatibility -if sys.version_info.major < 3: - input = raw_input -else: - from builtins import input - -# Optional PODPAC dependency -try: - import requests -except: - - class Dum(object): - def __init__(self, *args, **kwargs): - pass +import logging - requests = Dum() - requests.Session = Dum +import requests +import traitlets as tl +from lazy_import import lazy_module -from podpac.core import utils from podpac.core.settings import settings +from podpac.core.utils import cached_property +_log = logging.getLogger(__name__) -class Session(requests.Session): - """Base Class for authentication in PODPAC - - Attributes - ---------- - auth : tuple - (username, password) string in plain text - hostname : str - Host address (eg. http://example.com) that gets authenticated. - password : str - Password used for authentication. - Loaded from podpac settings file using password@:attr:`hostname` as the key. - username : str - Username used for authentication. - Loaded from podpac settings file using username@:attr:`hostname` as the key. - """ - - def __init__(self, hostname="", username=None, password=None): - # requests __init__ - super(Session, self).__init__() - - self.hostname = hostname - self.username = username - self.password = password - - # load username/password from settings - if self.username is None: - self.username = settings["username@" + self.hostname] - - if self.password is None: - self.password = settings["password@" + self.hostname] - - self.auth = (self.username, self.password) - - -class EarthDataSession(Session): - """ - Modified from: https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python - overriding requests.Session.rebuild_auth to maintain headers when redirected +def set_credentials(hostname, username=None, password=None): + """Set authentication credentials for a remote URL in the :class:`podpac.settings`. - Attributes + Parameters ---------- - product_url : str - Url to NSIDC product OpenDAP server - product_url_regex : str - Regex used to match redirected hostname if different from :attr:`self.hostname` + hostname : str + Hostname for `username` and `password`. + username : str, optional + Username to store in settings for `hostname`. + If no username is provided and the username does not already exist in the settings, + the user will be prompted to enter one. + password : str, optional + Password to store in settings for `hostname` + If no password is provided and the password does not already exist in the settings, + the user will be prompted to enter one. """ - # make sure attributes are persistent across all EarthDataSession classes - hostname = None - username = None - password = None - auth = tuple() + if hostname is None or hostname == "": + raise ValueError("`hostname` must be defined") - def __init__(self, product_url="", **kwargs): + # see whats stored in settings already + u_settings = settings.get("username@{}".format(hostname)) + p_settings = settings.get("password@{}".format(hostname)) - # override hostname with earthdata url - kwargs["hostname"] = "urs.earthdata.nasa.gov" + # get username from 1. function input 2. settings 3. python input() + u = username or u_settings or input("Username: ") + p = password or p_settings or getpass.getpass() - # Session init - super(EarthDataSession, self).__init__(**kwargs) + # set values in settings + settings["username@{}".format(hostname)] = u + settings["password@{}".format(hostname)] = p - # store product_url - self.product_url = product_url + _log.debug("Set credentials for hostname {}".format(hostname)) - # parse product_url for hostname - product_url_hostname = requests.utils.urlparse(self.product_url).hostname - # make all numbers in product_url_hostname wildcards - self.product_url_regex = ( - re.compile(re.sub(r"\d", r"\\d", product_url_hostname)) if product_url_hostname is not None else None - ) +class RequestsSessionMixin(tl.HasTraits): + hostname = tl.Unicode(allow_none=False) + auth_required = tl.Bool(default_value=False) - def rebuild_auth(self, prepared_request, response): + @property + def username(self): + """Returns username stored in settings for accessing `self.hostname`. + The username is stored under key `username@` + + Returns + ------- + str + username stored in settings for accessing `self.hostname` + + Raises + ------ + ValueError + Raises a ValueError if not username is stored in settings for `self.hostname` """ - Overrides from the library to keep headers when redirected to or from - the NASA auth host. + key = "username@{}".format(self.hostname) + username = settings.get(key) + if not username: + raise ValueError( + "No username found for hostname '{0}'. Use `{1}.set_credentials(username='', password='') to store credentials for this host".format( + self.hostname, self.__class__.__name__ + ) + ) + + return username + + @property + def password(self): + """Returns password stored in settings for accessing `self.hostname`. + The password is stored under key `password@` - Parameters - ---------- - prepared_request : requests.Request - Description - response : requests.Response - Description + Returns + ------- + str + password stored in settings for accessing `self.hostname` + + Raises + ------ + ValueError + Raises a ValueError if not password is stored in settings for `self.hostname` + """ + key = "password@{}".format(self.hostname) + password = settings.get(key) + if not password: + raise ValueError( + "No password found for hostname {0}. Use `{1}.set_credentials(username='', password='') to store credentials for this host".format( + self.hostname, self.__class__.__name__ + ) + ) + + return password + + @cached_property + def session(self): + """Requests Session object for making calls to remote `self.hostname` + See https://2.python-requests.org/en/master/api/#sessionapi Returns ------- - None - + :class:requests.Session + Requests Session class with `auth` attribute defined """ - headers = prepared_request.headers - url = prepared_request.url - - if "Authorization" in headers: - original_parsed = requests.utils.urlparse(response.request.url) - redirect_parsed = requests.utils.urlparse(url) - - # delete Authorization headers if original and redirect do not match - # is not in product_url_regex - if ( - (original_parsed.hostname != redirect_parsed.hostname) - and redirect_parsed.hostname != self.hostname - and original_parsed.hostname != self.hostname - ): - - # if redirect matches product_url_regex, then allow the headers to stay - if self.product_url_regex is not None and self.product_url_regex.match(redirect_parsed.hostname): - pass - else: - del headers["Authorization"] - - return - - def update_login(self, username=None, password=None): - """Summary + return self._create_session() + + def set_credentials(self, username=None, password=None): + """Shortcut to :func:`podpac.authentication.set_crendentials` using class member :attr:`self.hostname` for the hostname Parameters ---------- username : str, optional - Username input + Username to store in settings for `self.hostname`. + If no username is provided and the username does not already exist in the settings, + the user will be prompted to enter one. password : str, optional - Password input + Password to store in settings for `self.hostname` + If no password is provided and the password does not already exist in the settings, + the user will be prompted to enter one. """ - print("Updating login information for: ", self.hostname) - - if username is None: - username = input("Username: ") - - settings["username@" + self.hostname] = username - - if password is None: - password = getpass.getpass() - - settings["password@" + self.hostname] = password + return set_credentials(self.hostname, username=username, password=password) - self.auth = (username, password) + def _create_session(self): + """Creates a :class:`requests.Session` with username and password defined + + Returns + ------- + :class:`requests.Session` + """ + s = requests.Session() + + try: + s.auth = (self.username, self.password) + except ValueError as e: + if self.auth_required: + raise e + else: + _log.warning("No auth provided for session") + + return s + + +class S3Mixin(tl.HasTraits): + """ Mixin to add S3 credentials and access to a Node. """ + + anon = tl.Bool(False) + aws_access_key_id = tl.Unicode(allow_none=True) + aws_secret_access_key = tl.Unicode(allow_none=True) + aws_region_name = tl.Unicode(allow_none=True) + aws_client_kwargs = tl.Dict() + config_kwargs = tl.Dict() + + @tl.default("aws_access_key_id") + def _get_access_key_id(self): + return settings["AWS_ACCESS_KEY_ID"] + + @tl.default("aws_secret_access_key") + def _get_secret_access_key(self): + return settings["AWS_SECRET_ACCESS_KEY"] + + @tl.default("aws_region_name") + def _get_region_name(self): + return settings["AWS_REGION_NAME"] + + @cached_property + def s3(self): + # this has to be done here for multithreading to work + s3fs = lazy_module("s3fs") + + if self.anon: + return s3fs.S3FileSystem(anon=True, client_kwargs=self.aws_client_kwargs) + else: + return s3fs.S3FileSystem( + key=self.aws_access_key_id, + secret=self.aws_secret_access_key, + region_name=self.aws_region_name, + client_kwargs=self.aws_client_kwargs, + config_kwargs=self.config_kwargs, + ) diff --git a/podpac/core/cache/cache_ctrl.py b/podpac/core/cache/cache_ctrl.py index 11f872154..719dcfc91 100644 --- a/podpac/core/cache/cache_ctrl.py +++ b/podpac/core/cache/cache_ctrl.py @@ -4,13 +4,19 @@ import podpac from podpac.core.settings import settings - from podpac.core.cache.utils import CacheWildCard, CacheException from podpac.core.cache.ram_cache_store import RamCacheStore from podpac.core.cache.disk_cache_store import DiskCacheStore from podpac.core.cache.s3_cache_store import S3CacheStore +_CACHE_STORES = {"ram": RamCacheStore, "disk": DiskCacheStore, "s3": S3CacheStore} + +_CACHE_NAMES = {RamCacheStore: "ram", DiskCacheStore: "disk", S3CacheStore: "s3"} + +_CACHE_MODES = ["ram", "disk", "network", "all"] + + def get_default_cache_ctrl(): """ Get the default CacheCtrl according to the settings. @@ -27,46 +33,39 @@ def get_default_cache_ctrl(): return make_cache_ctrl(settings["DEFAULT_CACHE"]) -def make_cache_ctrl(stores): +def make_cache_ctrl(names): """ Make a cache_ctrl from a list of cache store types. Arguments --------- - stores : str or list - cache store or stores, e.g. 'ram' or ['ram', 'disk']. + names : str or list + cache name or names, e.g. 'ram' or ['ram', 'disk']. Returns ------- ctrl : CacheCtrl - CachCtrl using the specified cache stores + CachCtrl using the specified cache names """ - if isinstance(stores, str): - stores = [stores] + if isinstance(names, six.string_types): + names = [names] - cache_stores = [] - for elem in stores: - if elem == "ram": - cache_stores.append(RamCacheStore()) - elif elem == "disk": - cache_stores.append(DiskCacheStore()) - elif elem == "s3": - cache_stores.append(S3CacheStore()) - else: - raise ValueError("Unknown cache store type '%s'" % elem) + for name in names: + if name not in _CACHE_STORES: + raise ValueError("Unknown cache store type '%s', options are %s" % (name, list(_CACHE_STORES))) - return CacheCtrl(cache_stores) + return CacheCtrl([_CACHE_STORES[name]() for name in names]) -def clear_cache(mode=None): +def clear_cache(mode="all"): """ Clear the entire default cache_ctrl. Arguments --------- mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. + determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'. """ cache_ctrl = get_default_cache_ctrl() @@ -88,20 +87,22 @@ def __init__(self, cache_stores=[]): Parameters ---------- cache_stores : list, optional - list of CacheStore objects to manage, in the order that they should be interogated. + list of CacheStore objects to manage, in the order that they should be interrogated. """ + self._cache_stores = cache_stores - self._cache_mode = None - def _get_cache_stores(self, mode): - if mode is None: - mode = self._cache_mode - if mode is None: - mode = "all" + def __repr__(self): + return "CacheCtrl(cache_stores=%s)" % self.cache_stores + + @property + def cache_stores(self): + return [_CACHE_NAMES[store.__class__] for store in self._cache_stores] + def _get_cache_stores_by_mode(self, mode="all"): return [c for c in self._cache_stores if mode in c.cache_modes] - def put(self, node, data, key, coordinates=None, mode=None, update=False): + def put(self, node, data, key, coordinates=None, mode="all", update=True): """Cache data for specified node. Parameters @@ -112,33 +113,33 @@ def put(self, node, data, key, coordinates=None, mode=None, update=False): Data to cache key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. + determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'. update : bool If True existing data in cache will be updated with `data`, If False, error will be thrown if attempting put something into the cache with the same node, key, coordinates of an existing entry. """ if not isinstance(node, podpac.Node): - raise TypeError("node must of type 'Node', not '%s'" % type(Node)) + raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node)) if not isinstance(key, six.string_types): - raise TypeError("key must be a string type, not '%s'" % (type(key))) + raise TypeError("Invalid key (must be a string, not '%s')" % (type(key))) if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None: - raise TypeError("coordinates must be of type 'Coordinates', not '%s'" % type(coordinates)) + raise TypeError("Invalid coordinates (must be of type 'Coordinates', not '%s')" % type(coordinates)) - if not isinstance(mode, six.string_types) and mode is not None: - raise TypeError("mode must be of type 'str', not '%s'" % type(mode)) + if mode not in _CACHE_MODES: + raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode)) if key == "*": - raise ValueError("key cannot be '*'") + raise ValueError("Invalid key ('*' is reserved)") - for c in self._get_cache_stores(mode): + for c in self._get_cache_stores_by_mode(mode): c.put(node=node, data=data, key=key, coordinates=coordinates, update=update) - def get(self, node, key, coordinates=None, mode=None): + def get(self, node, key, coordinates=None, mode="all"): """Get cached data for this node. Parameters @@ -147,10 +148,10 @@ def get(self, node, key, coordinates=None, mode=None): node requesting storage. key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. + determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'. Returns ------- @@ -164,26 +165,26 @@ def get(self, node, key, coordinates=None, mode=None): """ if not isinstance(node, podpac.Node): - raise TypeError("node must of type 'Node', not '%s'" % type(Node)) + raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node)) if not isinstance(key, six.string_types): - raise TypeError("key must be a string type, not '%s'" % (type(key))) + raise TypeError("Invalid key (must be a string, not '%s')" % (type(key))) if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None: - raise TypeError("coordinates must be of type 'Coordinates', not '%s'" % type(coordinates)) + raise TypeError("Invalid coordinates (must be of type 'Coordinates', not '%s')" % type(coordinates)) - if not isinstance(mode, six.string_types) and mode is not None: - raise TypeError("mode must be of type 'str', not '%s'" % type(mode)) + if mode not in _CACHE_MODES: + raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode)) if key == "*": - raise ValueError("key cannot be '*'") + raise ValueError("Invalid key ('*' is reserved)") - for c in self._get_cache_stores(mode): + for c in self._get_cache_stores_by_mode(mode): if c.has(node=node, key=key, coordinates=coordinates): return c.get(node=node, key=key, coordinates=coordinates) raise CacheException("Requested data is not in any cache stores.") - def has(self, node, key, coordinates=None, mode=None): + def has(self, node, key, coordinates=None, mode="all"): """Check for cached data for this node Parameters @@ -195,7 +196,7 @@ def has(self, node, key, coordinates=None, mode=None): coordinates: Coordinate, optional Coordinates for which cached object should be checked mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. + determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'. Returns ------- @@ -204,27 +205,27 @@ def has(self, node, key, coordinates=None, mode=None): """ if not isinstance(node, podpac.Node): - raise TypeError("node must of type 'Node', not '%s'" % type(Node)) + raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node)) if not isinstance(key, six.string_types): - raise TypeError("key must be a string type, not '%s'" % (type(key))) + raise TypeError("Invalid key (must be a string, not '%s')" % (type(key))) if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None: - raise TypeError("coordinates must be of type 'Coordinates', not '%s'" % type(coordinates)) + raise TypeError("Invalid coordinates (must be of type 'Coordinates', not '%s')" % type(coordinates)) - if not isinstance(mode, six.string_types) and mode is not None: - raise TypeError("mode must be of type 'str', not '%s'" % type(mode)) + if mode not in _CACHE_MODES: + raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode)) if key == "*": - raise ValueError("key cannot be '*'") + raise ValueError("Invalid key ('*' is reserved)") - for c in self._get_cache_stores(mode): + for c in self._get_cache_stores_by_mode(mode): if c.has(node=node, key=key, coordinates=coordinates): return True return False - def rem(self, node, key, coordinates=None, mode=None): + def rem(self, node, key, coordinates=None, mode="all"): """Delete cached data for this node. Parameters @@ -233,23 +234,23 @@ def rem(self, node, key, coordinates=None, mode=None): node requesting storage. key : str Delete only cached objects with this key. Use `'*'` to match all keys. - coordinates : Coordinates, str + coordinates : :class:`podpac.Coordinates`, str Delete only cached objects for these coordinates. Use `'*'` to match all coordinates. mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. + determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'. """ if not isinstance(node, podpac.Node): - raise TypeError("node must of type 'Node', not '%s'" % type(podpac.Node)) + raise TypeError("Invalid node (must be of type Node, not '%s')" % type(node)) if not isinstance(key, six.string_types): - raise TypeError("key must be a string type, not '%s'" % (type(key))) + raise TypeError("Invalid key (must be a string, not '%s')" % (type(key))) if not isinstance(coordinates, podpac.Coordinates) and coordinates is not None and coordinates != "*": - raise TypeError("coordinates must be '*' or of type 'Coordinates' not '%s'" % type(coordinates)) + raise TypeError("Invalid coordinates (must be '*' or of type 'Coordinates', not '%s')" % type(coordinates)) - if not isinstance(mode, six.string_types) and mode is not None: - raise TypeError("mode must be of type 'str', not '%s'" % type(mode)) + if mode not in _CACHE_MODES: + raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode)) if key == "*": key = CacheWildCard() @@ -257,21 +258,21 @@ def rem(self, node, key, coordinates=None, mode=None): if coordinates == "*": coordinates = CacheWildCard() - for c in self._get_cache_stores(mode): + for c in self._get_cache_stores_by_mode(mode): c.rem(node=node, key=key, coordinates=coordinates) - def clear(self, mode=None): + def clear(self, mode="all"): """ Clear all cached data. Parameters ------------ mode : str - determines what types of the `CacheStore` are affected: 'ram','disk','network','all'. Defaults to `node._cache_mode` or 'all'. Overriden by `self._cache_mode` if `self._cache_mode` is not `None`. + determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'. """ - if not isinstance(mode, six.string_types) and mode is not None: - raise TypeError("mode must be of type 'str', not '%s'" % type(mode)) + if mode not in _CACHE_MODES: + raise ValueError("Invalid mode (must be one of %s, not '%s')" % (_CACHE_MODES, mode)) - for c in self._get_cache_stores(mode): + for c in self._get_cache_stores_by_mode(mode): c.clear() diff --git a/podpac/core/cache/cache_store.py b/podpac/core/cache/cache_store.py index 8d23fe752..2f128540d 100644 --- a/podpac/core/cache/cache_store.py +++ b/podpac/core/cache/cache_store.py @@ -29,7 +29,7 @@ def size(self): raise NotImplementedError - def put(self, node, data, key, coordinates=None, update=False): + def put(self, node, data, key, coordinates=None, update=True): """Cache data for specified node. Parameters @@ -40,7 +40,7 @@ def put(self, node, data, key, coordinates=None, update=False): Data to cache key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output update : bool If True existing data in cache will be updated with `data`, If False, error will be thrown if attempting put something into the cache with the same node, key, coordinates of an existing entry. @@ -56,7 +56,7 @@ def get(self, node, key, coordinates=None): node requesting storage. key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output Returns @@ -80,7 +80,7 @@ def rem(self, node=None, key=None, coordinates=None): node requesting storage. key : str, optional Delete only cached objects with this key. - coordinates : Coordinates + coordinates : :class:`podpac.Coordinates` Delete only cached objects for these coordinates. """ raise NotImplementedError diff --git a/podpac/core/cache/disk_cache_store.py b/podpac/core/cache/disk_cache_store.py index 78bce95da..fce0e7ecb 100644 --- a/podpac/core/cache/disk_cache_store.py +++ b/podpac/core/cache/disk_cache_store.py @@ -23,10 +23,7 @@ def __init__(self): if not settings["DISK_CACHE_ENABLED"]: raise CacheException("Disk cache is disabled in the podpac settings.") - if os.path.isabs(settings["DISK_CACHE_DIR"]): - self._root_dir_path = settings["DISK_CACHE_DIR"] - else: - self._root_dir_path = os.path.join(settings["ROOT_PATH"], settings["DISK_CACHE_DIR"]) + self._root_dir_path = settings.cache_path # ----------------------------------------------------------------------------------------------------------------- # public cache API diff --git a/podpac/core/cache/file_cache_store.py b/podpac/core/cache/file_cache_store.py index 4eb55234c..1863bad75 100644 --- a/podpac/core/cache/file_cache_store.py +++ b/podpac/core/cache/file_cache_store.py @@ -36,7 +36,7 @@ class FileCacheStore(CacheStore): # public cache API methods # ----------------------------------------------------------------------------------------------------------------- - def put(self, node, data, key, coordinates=None, update=False): + def put(self, node, data, key, coordinates=None, update=True): """Cache data for specified node. Parameters @@ -47,17 +47,17 @@ def put(self, node, data, key, coordinates=None, update=False): Data to cache key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output update : bool If True existing data in cache will be updated with `data`, If False, error will be thrown if attempting put something into the cache with the same node, key, coordinates of an existing entry. """ # check for existing entry - if self.has(node, key, coordinates): - if not update: - raise CacheException("Cache entry already exists. Use `update=True` to overwrite.") - self.rem(node, key, coordinates) + if not update and self.has(node, key, coordinates): + raise CacheException("Cache entry already exists. Use `update=True` to overwrite.") + + self.rem(node, key, coordinates) # serialize path_root = self._path_join(self._get_node_dir(node), self._get_filename(node, key, coordinates)) @@ -88,7 +88,7 @@ def put(self, node, data, key, coordinates=None, update=False): else: warnings.warn( "Object of type '%s' is not json serializable; caching object to file using pickle, which " - "may not be compatible with other Python versions or podpac versions." + "may not be compatible with other Python versions or podpac versions." % type(data) ) path = path_root + ".pkl" s = pickle.dumps(data) @@ -120,7 +120,7 @@ def get(self, node, key, coordinates=None): node requesting storage. key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output Returns @@ -195,7 +195,7 @@ def rem(self, node, key=CacheWildCard(), coordinates=CacheWildCard()): node requesting storage key : str, CacheWildCard, optional Delete cached objects with this key, or any key if `key` is a CacheWildCard. - coordinates : Coordinates, CacheWildCard, None, optional + coordinates : :class:`podpac.Coordinates`, CacheWildCard, None, optional Delete only cached objects for these coordinates, or any coordinates if `coordinates` is a CacheWildCard. `None` specifically indicates entries that do not have coordinates. """ diff --git a/podpac/core/cache/ram_cache_store.py b/podpac/core/cache/ram_cache_store.py index 9c884f954..071a6d8c7 100644 --- a/podpac/core/cache/ram_cache_store.py +++ b/podpac/core/cache/ram_cache_store.py @@ -55,7 +55,7 @@ def size(self): process = psutil.Process(os.getpid()) return process.memory_info().rss # this is actually the total size of the process - def put(self, node, data, key, coordinates=None, update=False): + def put(self, node, data, key, coordinates=None, update=True): """Cache data for specified node. Parameters @@ -66,7 +66,7 @@ def put(self, node, data, key, coordinates=None, update=False): Data to cache key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output update : bool If True existing data in cache will be updated with `data`, If False, error will be thrown if attempting put something into the cache with the same node, key, coordinates of an existing entry. @@ -77,9 +77,10 @@ def put(self, node, data, key, coordinates=None, update=False): full_key = self._get_full_key(node, key, coordinates) - if full_key in _thread_local.cache: - if not update: - raise CacheException("Cache entry already exists. Use update=True to overwrite.") + if not update and full_key in _thread_local.cache: + raise CacheException("Cache entry already exists. Use update=True to overwrite.") + + self.rem(node, key, coordinates) if self.max_size is not None and self.size >= self.max_size: # # TODO removal policy @@ -101,7 +102,7 @@ def get(self, node, key, coordinates=None): node requesting storage. key : str Cached object key, e.g. 'output'. - coordinates : Coordinates, optional + coordinates : :class:`podpac.Coordinates`, optional Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output Returns @@ -158,7 +159,7 @@ def rem(self, node, key=CacheWildCard(), coordinates=CacheWildCard()): node requesting storage. key : str, optional Delete only cached objects with this key. - coordinates : Coordinates + coordinates : :class:`podpac.Coordinates` Delete only cached objects for these coordinates. """ diff --git a/podpac/core/cache/test/test_cache_ctrl.py b/podpac/core/cache/test/test_cache_ctrl.py index 17f8abe89..048fb1cd6 100644 --- a/podpac/core/cache/test/test_cache_ctrl.py +++ b/podpac/core/cache/test/test_cache_ctrl.py @@ -9,18 +9,245 @@ from podpac.core.cache.cache_ctrl import get_default_cache_ctrl, make_cache_ctrl, clear_cache +class CacheCtrlTestNode(podpac.Node): + pass + + +NODE = CacheCtrlTestNode() + + class TestCacheCtrl(object): - def test_init(self): + def test_init_default(self): + ctrl = CacheCtrl() + assert len(ctrl._cache_stores) == 0 + assert ctrl.cache_stores == [] + repr(ctrl) + + def test_init_list(self): + ctrl = CacheCtrl(cache_stores=[]) + assert len(ctrl._cache_stores) == 0 + assert ctrl.cache_stores == [] + repr(ctrl) + + ctrl = CacheCtrl(cache_stores=[RamCacheStore()]) + assert len(ctrl._cache_stores) == 1 + assert isinstance(ctrl._cache_stores[0], RamCacheStore) + assert ctrl.cache_stores == ["ram"] + repr(ctrl) + + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + assert len(ctrl._cache_stores) == 2 + assert isinstance(ctrl._cache_stores[0], RamCacheStore) + assert isinstance(ctrl._cache_stores[1], DiskCacheStore) + assert ctrl.cache_stores == ["ram", "disk"] + repr(ctrl) + + def test_put_has_get(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + ctrl.clear() + + # has False + assert not ctrl._cache_stores[0].has(NODE, "key") + assert not ctrl._cache_stores[1].has(NODE, "key") + assert not ctrl.has(NODE, "key") + + # put + ctrl.put(NODE, 10, "key") + + # has True + assert ctrl._cache_stores[0].has(NODE, "key") + assert ctrl._cache_stores[1].has(NODE, "key") + assert ctrl.has(NODE, "key") + + # get value + assert ctrl._cache_stores[0].get(NODE, "key") == 10 + assert ctrl._cache_stores[1].get(NODE, "key") == 10 + assert ctrl.get(NODE, "key") == 10 + + def test_partial_has_get(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + ctrl.clear() + + # has False + assert not ctrl._cache_stores[0].has(NODE, "key") + assert not ctrl._cache_stores[1].has(NODE, "key") + assert not ctrl.has(NODE, "key") + + # put only in disk + ctrl._cache_stores[1].put(NODE, 10, "key") + + # has + assert not ctrl._cache_stores[0].has(NODE, "key") + assert ctrl._cache_stores[1].has(NODE, "key") + assert ctrl.has(NODE, "key") + + # get + with pytest.raises(CacheException, match="Cache miss"): + ctrl._cache_stores[0].get(NODE, "key") + assert ctrl._cache_stores[1].get(NODE, "key") == 10 + assert ctrl.get(NODE, "key") == 10 + + def test_get_cache_miss(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + ctrl.clear() + + with pytest.raises(CacheException, match="Requested data is not in any cache stores"): + ctrl.get(NODE, "key") + + def test_put_rem(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + + # put and check has + ctrl.put(NODE, 10, "key") + assert ctrl.has(NODE, "key") + + # rem other and check has + ctrl.rem(NODE, "other") + assert ctrl.has(NODE, "key") + + # rem and check has + ctrl.rem(NODE, "key") + assert not ctrl.has(NODE, "key") + + def test_rem_wildcard_key(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + + # put and check has + ctrl.put(NODE, 10, "key") + assert ctrl.has(NODE, "key") + + # rem other and check has + ctrl.rem(NODE, key="*") + assert not ctrl.has(NODE, "key") + + def test_rem_wildcard_coordinates(self): pass + def test_put_clear(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) -def test_get_default_cache_ctrl(): - ctrl = get_default_cache_ctrl() + # put and check has + ctrl.put(NODE, 10, "key") + assert ctrl.has(NODE, "key") + + # clear and check has + ctrl.clear() + + # check has + assert not ctrl.has(NODE, "key") + + def test_put_has_mode(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + + # put disk and check has + ctrl.clear() + assert not ctrl.has(NODE, "key") + + ctrl.put(NODE, 10, "key", mode="disk") + assert not ctrl._cache_stores[0].has(NODE, "key") + assert not ctrl.has(NODE, "key", mode="ram") + assert ctrl._cache_stores[1].has(NODE, "key") + assert ctrl.has(NODE, "key", mode="disk") + assert ctrl.has(NODE, "key") + + # put ram and check has + ctrl.clear() + assert not ctrl.has(NODE, "key") + + ctrl.put(NODE, 10, "key", mode="ram") + assert ctrl._cache_stores[0].has(NODE, "key") + assert ctrl.has(NODE, "key", mode="ram") + assert not ctrl._cache_stores[1].has(NODE, "key") + assert not ctrl.has(NODE, "key", mode="disk") + assert ctrl.has(NODE, "key") + + def test_invalid_node(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) - assert isinstance(ctrl, CacheCtrl) - assert ctrl._cache_stores == [] + # type + with pytest.raises(TypeError, match="Invalid node"): + ctrl.put("node", 10, "key") + with pytest.raises(TypeError, match="Invalid node"): + ctrl.get("node", "key") + + with pytest.raises(TypeError, match="Invalid node"): + ctrl.has("node", "key") + + with pytest.raises(TypeError, match="Invalid node"): + ctrl.rem("node", "key") + + def test_invalid_key(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + + # type + with pytest.raises(TypeError, match="Invalid key"): + ctrl.put(NODE, 10, 10) + + with pytest.raises(TypeError, match="Invalid key"): + ctrl.get(NODE, 10) + + with pytest.raises(TypeError, match="Invalid key"): + ctrl.has(NODE, 10) + + with pytest.raises(TypeError, match="Invalid key"): + ctrl.rem(NODE, 10) + + # wildcard + with pytest.raises(ValueError, match="Invalid key"): + ctrl.put(NODE, 10, "*") + + with pytest.raises(ValueError, match="Invalid key"): + ctrl.get(NODE, "*") + + with pytest.raises(ValueError, match="Invalid key"): + ctrl.has(NODE, "*") + + # allowed + ctrl.rem(NODE, "*") + + def test_invalid_coordinates(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + + # type + with pytest.raises(TypeError, match="Invalid coordinates"): + ctrl.put(NODE, 10, "key", coordinates="coords") + + with pytest.raises(TypeError, match="Invalid coordinates"): + ctrl.get(NODE, "key", coordinates="coords") + + with pytest.raises(TypeError, match="Invalid coordinates"): + ctrl.has(NODE, "key", coordinates="coords") + + with pytest.raises(TypeError, match="Invalid coordinates"): + ctrl.rem(NODE, "key", coordinates="coords") + + def test_invalid_mode(self): + ctrl = CacheCtrl(cache_stores=[RamCacheStore(), DiskCacheStore()]) + + with pytest.raises(ValueError, match="Invalid mode"): + ctrl.put(NODE, 10, "key", mode="other") + + with pytest.raises(ValueError, match="Invalid mode"): + ctrl.get(NODE, "key", mode="other") + + with pytest.raises(ValueError, match="Invalid mode"): + ctrl.has(NODE, "key", mode="other") + + with pytest.raises(ValueError, match="Invalid mode"): + ctrl.rem(NODE, "key", mode="other") + + with pytest.raises(ValueError, match="Invalid mode"): + ctrl.clear(mode="other") + + +def test_get_default_cache_ctrl(): with podpac.settings: + podpac.settings["DEFAULT_CACHE"] = [] + ctrl = get_default_cache_ctrl() + assert isinstance(ctrl, CacheCtrl) + assert ctrl._cache_stores == [] + podpac.settings["DEFAULT_CACHE"] = ["ram"] ctrl = get_default_cache_ctrl() assert isinstance(ctrl, CacheCtrl) @@ -28,35 +255,47 @@ def test_get_default_cache_ctrl(): assert isinstance(ctrl._cache_stores[0], RamCacheStore) -def test_make_cache_ctrl(): - ctrl = make_cache_ctrl("ram") - assert isinstance(ctrl, CacheCtrl) - assert len(ctrl._cache_stores) == 1 - assert isinstance(ctrl._cache_stores[0], RamCacheStore) +class TestMakeCacheCtrl(object): + def test_str(self): + ctrl = make_cache_ctrl("ram") + assert isinstance(ctrl, CacheCtrl) + assert len(ctrl._cache_stores) == 1 + assert isinstance(ctrl._cache_stores[0], RamCacheStore) - ctrl = make_cache_ctrl("disk") - assert len(ctrl._cache_stores) == 1 - assert isinstance(ctrl._cache_stores[0], DiskCacheStore) + ctrl = make_cache_ctrl("disk") + assert len(ctrl._cache_stores) == 1 + assert isinstance(ctrl._cache_stores[0], DiskCacheStore) - ctrl = make_cache_ctrl(["ram", "disk"]) - assert len(ctrl._cache_stores) == 2 - assert isinstance(ctrl._cache_stores[0], RamCacheStore) - assert isinstance(ctrl._cache_stores[1], DiskCacheStore) + def test_list(self): + ctrl = make_cache_ctrl(["ram", "disk"]) + assert len(ctrl._cache_stores) == 2 + assert isinstance(ctrl._cache_stores[0], RamCacheStore) + assert isinstance(ctrl._cache_stores[1], DiskCacheStore) - with pytest.raises(ValueError, match="Unknown cache store type"): - ctrl = make_cache_ctrl("other") + ctrl = make_cache_ctrl(["ram", "disk"]) + assert len(ctrl._cache_stores) == 2 + assert isinstance(ctrl._cache_stores[0], RamCacheStore) + assert isinstance(ctrl._cache_stores[1], DiskCacheStore) + def test_invalid(self): + with pytest.raises(ValueError, match="Unknown cache store type"): + ctrl = make_cache_ctrl("other") -def test_clear_cache(): - with podpac.settings: - # make a default cache - podpac.settings["DEFAULT_CACHE"] = ["ram"] + with pytest.raises(ValueError, match="Unknown cache store type"): + ctrl = make_cache_ctrl(["other"]) + + +class TestClearCache(object): + def test_clear_cache(self): + with podpac.settings: + # make a default cache + podpac.settings["DEFAULT_CACHE"] = ["ram"] - # fill the default cache - node = podpac.algorithm.Arange() - node.put_cache(0, "mykey") - assert node.has_cache("mykey") + # fill the default cache + node = podpac.algorithm.Arange() + node.put_cache(0, "mykey") + assert node.has_cache("mykey") - clear_cache() + clear_cache() - assert not node.has_cache("mykey") + assert not node.has_cache("mykey") diff --git a/podpac/core/cache/test/test_cache_stores.py b/podpac/core/cache/test/test_cache_stores.py index 5b0b0d13e..79daceac5 100644 --- a/podpac/core/cache/test/test_cache_stores.py +++ b/podpac/core/cache/test/test_cache_stores.py @@ -1,13 +1,13 @@ -import numpy as np import os import shutil import copy +import tempfile import pytest import xarray as xr +import numpy as np import podpac - from podpac.core.cache.utils import CacheException from podpac.core.cache.ram_cache_store import RamCacheStore from podpac.core.cache.disk_cache_store import DiskCacheStore @@ -15,7 +15,7 @@ COORDS1 = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40], ["2018-01-01", "2018-01-02"]], dims=["lat", "lon", "time"]) COORDS2 = podpac.Coordinates([[0, 1, 2], [10, 20, 30]], dims=["lat", "lon"]) -NODE1 = podpac.data.Array(source=np.ones(COORDS1.shape), native_coordinates=COORDS1) +NODE1 = podpac.data.Array(source=np.ones(COORDS1.shape), coordinates=COORDS1) NODE2 = podpac.algorithm.Arange() @@ -87,11 +87,11 @@ def test_update(self): # raise exception and do not change with pytest.raises(CacheException, match="Cache entry already exists."): - store.put(NODE1, 10, "mykey1") + store.put(NODE1, 10, "mykey1", update=False) assert store.get(NODE1, "mykey1") == 10 # update - store.put(NODE1, 20, "mykey1", update=True) + store.put(NODE1, 20, "mykey1") assert store.get(NODE1, "mykey1") == 20 def test_get_put_none(self): @@ -314,36 +314,40 @@ class TestDiskCacheStore(FileCacheStoreTests): Store = DiskCacheStore enabled_setting = "DISK_CACHE_ENABLED" limit_setting = "DISK_CACHE_MAX_BYTES" - cache_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "tmp_cache")) def setup_method(self): super(TestDiskCacheStore, self).setup_method() - podpac.settings["DISK_CACHE_DIR"] = self.cache_dir - assert not os.path.exists(self.cache_dir) + self.test_cache_dir = tempfile.mkdtemp(prefix="podpac-test-") + podpac.settings["DISK_CACHE_DIR"] = self.test_cache_dir def teardown_method(self): super(TestDiskCacheStore, self).teardown_method() - shutil.rmtree(self.cache_dir, ignore_errors=True) + shutil.rmtree(self.test_cache_dir, ignore_errors=True) def test_cache_dir(self): - # absolute path - podpac.settings["DISK_CACHE_DIR"] = self.cache_dir - expected = self.cache_dir - store = DiskCacheStore() - store.put(NODE1, 10, "mykey1") - assert store.find(NODE1, "mykey1").startswith(expected) - store.clear() + with podpac.settings: - # relative path - podpac.settings["DISK_CACHE_DIR"] = "_testcache_" - expected = os.path.join(podpac.settings["ROOT_PATH"], "_testcache_") - store = DiskCacheStore() - store.clear() - store.put(NODE1, 10, "mykey1") - assert store.find(NODE1, "mykey1").startswith(expected) - store.clear() + # absolute path + podpac.settings["DISK_CACHE_DIR"] = self.test_cache_dir + expected = self.test_cache_dir + store = DiskCacheStore() + store.put(NODE1, 10, "mykey1") + assert store.find(NODE1, "mykey1").startswith(expected) + store.clear() + + # relative path + podpac.settings["DISK_CACHE_DIR"] = "_testcache_" + expected = os.path.join( + os.environ.get("XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".config", "podpac")), + "_testcache_", + ) + store = DiskCacheStore() + store.clear() + store.put(NODE1, 10, "mykey1") + assert store.find(NODE1, "mykey1").startswith(expected) + store.clear() def test_size(self): store = self.Store() @@ -363,17 +367,17 @@ class TestS3CacheStore(FileCacheStoreTests): Store = S3CacheStore enabled_setting = "S3_CACHE_ENABLED" limit_setting = "S3_CACHE_MAX_BYTES" - cache_dir = "tmp_cache" + test_cache_dir = "tmp_cache" def setup_method(self): super(TestS3CacheStore, self).setup_method() - podpac.settings["S3_CACHE_DIR"] = self.cache_dir + podpac.settings["S3_CACHE_DIR"] = self.test_cache_dir def teardown_method(self): try: store = S3CacheStore() - store._rmtree(self.cache_dir) + store._rmtree(self.test_cache_dir) except: pass diff --git a/podpac/core/cache/test/tmp_cache/podpac/core/node/Test/Test--podpac-cached-property-blue_242a08f6c2e12f7113898cea3ab5003f_64b03983688fcfb5e0e653280ea0b679_None.json b/podpac/core/cache/test/tmp_cache/podpac/core/node/Test/Test--podpac-cached-property-blue_242a08f6c2e12f7113898cea3ab5003f_64b03983688fcfb5e0e653280ea0b679_None.json new file mode 100644 index 000000000..e68122571 --- /dev/null +++ b/podpac/core/cache/test/tmp_cache/podpac/core/node/Test/Test--podpac-cached-property-blue_242a08f6c2e12f7113898cea3ab5003f_64b03983688fcfb5e0e653280ea0b679_None.json @@ -0,0 +1 @@ +"blue" \ No newline at end of file diff --git a/podpac/core/cache/test/tmp_cache/wcs_temp.tiff b/podpac/core/cache/test/tmp_cache/wcs_temp.tiff new file mode 100644 index 000000000..9396ba2d1 Binary files /dev/null and b/podpac/core/cache/test/tmp_cache/wcs_temp.tiff differ diff --git a/podpac/core/compositor.py b/podpac/core/compositor.py deleted file mode 100644 index 0cc51c971..000000000 --- a/podpac/core/compositor.py +++ /dev/null @@ -1,388 +0,0 @@ -""" -Compositor Summary -""" - - -from __future__ import division, unicode_literals, print_function, absolute_import - -import copy - -import numpy as np -import traitlets as tl - -# Internal imports -from podpac.core.settings import settings -from podpac.core.coordinates import Coordinates, merge_dims -from podpac.core.utils import common_doc, ArrayTrait, trait_is_defined -from podpac.core.units import UnitsDataArray -from podpac.core.node import COMMON_NODE_DOC, node_eval, Node -from podpac.core.data.datasource import COMMON_DATA_DOC -from podpac.core.data.interpolation import interpolation_trait -from podpac.core.managers.multi_threading import thread_manager - -COMMON_COMPOSITOR_DOC = COMMON_DATA_DOC.copy() # superset of COMMON_NODE_DOC - - -@common_doc(COMMON_COMPOSITOR_DOC) -class Compositor(Node): - """Compositor - - Attributes - ---------- - cache_native_coordinates : Bool - Default is True. If native_coordinates are requested by the user, it may take a long time to calculate if the - Compositor points to many sources. The result is relatively small and is cached by default. Caching may not be - desired if the datasource change or is updated. - interpolation : str, dict, optional - {interpolation} - is_source_coordinates_complete : Bool - Default is False. The source_coordinates do not have to completely describe the source. For example, the source - coordinates could include the year-month-day of the source, but the actual source also has hour-minute-second - information. In that case, source_coordinates is incomplete. This flag is used to automatically construct - native_coordinates. - shared_coordinates : :class:`podpac.Coordinates`, optional - Coordinates that are shared amongst all of the composited sources - source : str - The source is used for a unique name to cache composited products. - source_coordinates : :class:`podpac.Coordinates` - Coordinates that make each source unique. Much be single-dimensional the same size as ``sources``. Optional. - sources : :class:`np.ndarray` - An array of sources. This is a numpy array as opposed to a list so that boolean indexing may be used to - subselect the nodes that will be evaluated. - source_coordinates : :class:`podpac.Coordinates`, optional - Coordinates that make each source unique. This is used for subsetting which sources to evaluate based on the - user-requested coordinates. It is an optimization. - strict_source_outputs : bool - Default is False. When compositing multi-output sources, combine the outputs from all sources. If True, do not - allow sources with different outputs (an exception will be raised if the sources contain different outputs). - - Notes - ----- - Developers of new Compositor nodes need to implement the `composite` method. - - Multitheading:: - * When MULTITHREADING is False, the compositor stops evaluated sources once the output is completely filled. - * When MULTITHREADING is True, the compositor must evaluate every source. - The result is the same, but note that because of this, disabling multithreading could sometimes be faster, - especially if the number of threads is low. - * NASA data servers seem to have a hard limit of 10 simultaneous requests, so a max of 10 threads is recommend - for most use-cases. - """ - - shared_coordinates = tl.Instance(Coordinates, allow_none=True) - source_coordinates = tl.Instance(Coordinates, allow_none=True) - is_source_coordinates_complete = tl.Bool( - False, - help=( - "This allows some optimizations but assumes that a node's " - "native_coordinates=source_coordinate + shared_coordinate " - "IN THAT ORDER" - ), - ) - - sources = ArrayTrait(ndim=1) - cache_native_coordinates = tl.Bool(True) - interpolation = interpolation_trait(default_value=None) - strict_source_outputs = tl.Bool(False) - - @tl.default("source_coordinates") - def _source_coordinates_default(self): - return self.get_source_coordinates() - - @tl.validate("sources") - def _validate_sources(self, d): - self.outputs # check for consistent outputs - return np.array([copy.deepcopy(source) for source in d["value"]]) - - @tl.default("outputs") - def _default_outputs(self): - if all(source.outputs is None for source in self.sources): - return None - - elif all(source.outputs is not None and source.output is None for source in self.sources): - if self.strict_source_outputs: - outputs = self.sources[0].outputs - if any(source.outputs != outputs for source in self.sources): - raise ValueError( - "Source outputs mismatch, and strict_source_outputs is True. " - "The sources must all contain the same outputs if strict_source_outputs is True. " - ) - return outputs - else: - outputs = [] - for source in self.sources: - for output in source.outputs: - if output not in outputs: - outputs.append(output) - if len(outputs) == 0: - outputs = None - return outputs - - else: - raise ValueError( - "Cannot composite standard sources with multi-output sources. " - "The sources must all be stardard single-output nodes or all multi-output nodes." - ) - - @tl.validate("source_coordinates") - def _validate_source_coordinates(self, d): - if d["value"] is not None: - if d["value"].ndim != 1: - raise ValueError("Invalid source_coordinates, invalid ndim (%d != 1)" % d["value"].ndim) - - if d["value"].size != self.sources.size: - raise ValueError( - "Invalid source_coordinates, source and source_coordinates size mismatch (%d != %d)" - % (d["value"].size, self.sources.size) - ) - - return d["value"] - - # default representation - def __repr__(self): - source_name = str(self.__class__.__name__) - - rep = "{}".format(source_name) - rep += "\n\tsource: {}".format("_".join(str(source) for source in self.sources[:3])) - rep += "\n\tinterpolation: {}".format(self.interpolation) - - return rep - - def get_source_coordinates(self): - """ - Returns the coordinates describing each source. - This may be implemented by derived classes, and is an optimization that allows evaluation subsets of source. - - Returns - ------- - :class:`podpac.Coordinates` - Coordinates describing each source. - """ - return None - - @tl.default("shared_coordinates") - def _shared_coordinates_default(self): - return self.get_shared_coordinates() - - def get_shared_coordinates(self): - """Coordinates shared by each source. - - Raises - ------ - NotImplementedError - Description - """ - raise NotImplementedError() - - def select_sources(self, coordinates): - """Downselect compositor sources based on requested coordinates. - - This is used during the :meth:`eval` process as an optimization - when :attr:`source_coordinates` are not pre-defined. - - Parameters - ---------- - coordinates : :class:`podpac.Coordinates` - Coordinates to evaluate at compositor sources - - Returns - ------- - :class:`np.ndarray` - Array of downselected sources - """ - - # if source coordinates are defined, use intersect - if self.source_coordinates is not None: - # intersecting sources only - try: - _, I = self.source_coordinates.intersect(coordinates, outer=True, return_indices=True) - - except: # Likely non-monotonic coordinates - _, I = self.source_coordinates.intersect(coordinates, outer=False, return_indices=True) - i = I[0] - src_subset = self.sources[i] - - # no downselection possible - get all sources compositor - else: - src_subset = self.sources - - return src_subset - - def composite(self, coordinates, outputs, result=None): - """Implements the rules for compositing multiple sources together. - - Parameters - ---------- - outputs : list - A list of outputs that need to be composited together - result : UnitDataArray, optional - An optional pre-filled array may be supplied, otherwise the output will be allocated. - - Raises - ------ - NotImplementedError - """ - raise NotImplementedError() - - def iteroutputs(self, coordinates): - """Summary - - Parameters - ---------- - coordinates : :class:`podpac.Coordinates` - Coordinates to evaluate at compositor sources - - Yields - ------ - :class:`podpac.core.units.UnitsDataArray` - Output from source node eval method - """ - # downselect sources based on coordinates - src_subset = self.select_sources(coordinates) - - if len(src_subset) == 0: - yield self.create_output_array(coordinates) - return - - # Set the interpolation properties for sources - if self.interpolation is not None: - for s in src_subset.ravel(): - if trait_is_defined(self, "interpolation"): - s.set_trait("interpolation", self.interpolation) - - # Optimization: if coordinates complete and source coords is 1D, - # set native_coordinates unless they are set already - # WARNING: this assumes - # native_coords = source_coords + shared_coordinates - # NOT native_coords = shared_coords + source_coords - if self.is_source_coordinates_complete and self.source_coordinates.ndim == 1: - coords_subset = list(self.source_coordinates.intersect(coordinates, outer=True).coords.values())[0] - coords_dim = list(self.source_coordinates.dims)[0] - for s, c in zip(src_subset, coords_subset): - nc = merge_dims([Coordinates(np.atleast_1d(c), dims=[coords_dim]), self.shared_coordinates]) - - if trait_is_defined(s, "native_coordinates") is False: - s.set_trait("native_coordinates", nc) - - if settings["MULTITHREADING"]: - n_threads = thread_manager.request_n_threads(len(src_subset)) - if n_threads == 1: - thread_manager.release_n_threads(n_threads) - else: - n_threads = 0 - - if settings["MULTITHREADING"] and n_threads > 1: - # evaluate nodes in parallel using thread pool - self._multi_threaded = True - pool = thread_manager.get_thread_pool(processes=n_threads) - outputs = pool.map(lambda src: src.eval(coordinates), src_subset) - pool.close() - thread_manager.release_n_threads(n_threads) - for output in outputs: - yield output - - else: - # evaluate nodes serially - self._multi_threaded = False - for src in src_subset: - yield src.eval(coordinates) - - @node_eval - @common_doc(COMMON_COMPOSITOR_DOC) - def eval(self, coordinates, output=None): - """Evaluates this nodes using the supplied coordinates. - - Parameters - ---------- - coordinates : :class:`podpac.Coordinates` - {requested_coordinates} - output : podpac.UnitsDataArray, optional - {eval_output} - - Returns - ------- - {eval_return} - """ - - self._requested_coordinates = coordinates - - outputs = self.iteroutputs(coordinates) - output = self.composite(coordinates, outputs, output) - return output - - def find_coordinates(self): - """ - Get the available native coordinates for the Node. - - Returns - ------- - coords_list : list - list of available coordinates (Coordinate objects) - """ - - raise NotImplementedError("TODO") - - @property - @common_doc(COMMON_COMPOSITOR_DOC) - def base_definition(self): - """Base node defintion for Compositor nodes. - - Returns - ------- - {definition_return} - """ - d = super(Compositor, self).base_definition - d["sources"] = self.sources - d["interpolation"] = self.interpolation - return d - - -class OrderedCompositor(Compositor): - """Compositor that combines sources based on their order in self.sources. Once a request contains no - nans, the result is returned. - """ - - @common_doc(COMMON_COMPOSITOR_DOC) - def composite(self, coordinates, data_arrays, result=None): - """Composites data_arrays in order that they appear. - - Parameters - ---------- - coordinates : :class:`podpac.Coordinates` - {requested_coordinates} - data_arrays : generator - Generator that gives UnitDataArray's with the source values. - result : podpac.UnitsDataArray, optional - {eval_output} - - Returns - ------- - {eval_return} This composites the sources together until there are no nans or no more sources. - """ - - if result is None: - result = self.create_output_array(coordinates) - else: - result[:] = np.nan - - mask = UnitsDataArray.create(coordinates, outputs=self.outputs, data=0, dtype=bool) - for data in data_arrays: - if self.outputs is None: - data = data.transpose(*result.dims) - self._composite(result, data, mask) - else: - for name in data["output"]: - self._composite(result.sel(output=name), data.sel(output=name), mask.sel(output=name)) - - # stop if the results are full - if np.all(mask): - break - - return result - - @staticmethod - def _composite(result, data, mask): - source_mask = np.isfinite(data.data) - b = ~mask & source_mask - result.data[b.data] = data.data[b.data] - mask |= source_mask diff --git a/podpac/core/compositor/__init__.py b/podpac/core/compositor/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/podpac/core/compositor/compositor.py b/podpac/core/compositor/compositor.py new file mode 100644 index 000000000..8c25eb10a --- /dev/null +++ b/podpac/core/compositor/compositor.py @@ -0,0 +1,262 @@ +""" +Compositor Summary +""" + + +from __future__ import division, unicode_literals, print_function, absolute_import + +import copy + +import numpy as np +import traitlets as tl + +# Internal imports +from podpac.core.settings import settings +from podpac.core.coordinates import Coordinates +from podpac.core.utils import common_doc, NodeTrait +from podpac.core.node import COMMON_NODE_DOC, node_eval, Node +from podpac.core.data.datasource import COMMON_DATA_DOC +from podpac.core.interpolation.interpolation import InterpolationTrait +from podpac.core.managers.multi_threading import thread_manager + +COMMON_COMPOSITOR_DOC = COMMON_DATA_DOC.copy() # superset of COMMON_NODE_DOC + + +@common_doc(COMMON_COMPOSITOR_DOC) +class BaseCompositor(Node): + """A base class for compositor nodes. + + Attributes + ---------- + sources : list + Source nodes. + source_coordinates : :class:`podpac.Coordinates` + Coordinates that make each source unique. Must the same size as ``sources`` and single-dimensional. Optional. + interpolation : str, dict, optional + {interpolation} + + Notes + ----- + Developers of compositor subclasses nodes need to implement the `composite` method. + + Multitheading:: + * When MULTITHREADING is False, the compositor stops evaluated sources once the output is completely filled. + * When MULTITHREADING is True, the compositor must evaluate every source. + The result is the same, but note that because of this, disabling multithreading could sometimes be faster, + especially if the number of threads is low. + * NASA data servers seem to have a hard limit of 10 simultaneous requests, so a max of 10 threads is recommend + for most use-cases. + """ + + sources = tl.List(trait=NodeTrait()).tag(attr=True) + interpolation = InterpolationTrait(allow_none=True, default_value=None).tag(attr=True) + source_coordinates = tl.Instance(Coordinates, allow_none=True, default_value=None).tag(attr=True) + + auto_outputs = tl.Bool(False) + + # debug traits + _eval_sources = tl.Any() + + @tl.validate("sources") + def _validate_sources(self, d): + sources = d["value"] + + n = np.sum([source.outputs is None for source in sources]) + if not (n == 0 or n == len(sources)): + raise ValueError( + "Cannot composite standard sources with multi-output sources. " + "The sources must all be standard single-output nodes or all multi-output nodes." + ) + + # copy so that interpolation trait of the input source is not overwritten + return [copy.deepcopy(source) for source in sources] + + @tl.validate("source_coordinates") + def _validate_source_coordinates(self, d): + if d["value"] is None: + return None + + if d["value"].ndim != 1: + raise ValueError("Invalid source_coordinates, invalid ndim (%d != 1)" % d["value"].ndim) + + if d["value"].size != len(self.sources): + raise ValueError( + "Invalid source_coordinates, source and source_coordinates size mismatch (%d != %d)" + % (d["value"].size, len(self.sources)) + ) + + return d["value"] + + @tl.default("outputs") + def _default_outputs(self): + if not self.auto_outputs: + return None + + # autodetect outputs from sources + if all(source.outputs is None for source in self.sources): + outputs = None + + elif all(source.outputs is not None and source.output is None for source in self.sources): + outputs = [] + for source in self.sources: + for output in source.outputs: + if output not in outputs: + outputs.append(output) + + if len(outputs) == 0: + outputs = None + + else: + raise RuntimeError( + "Compositor sources were not validated correctly. " + "Cannot composite standard sources with multi-output sources." + ) + + return outputs + + def select_sources(self, coordinates): + """Select and prepare sources based on requested coordinates. + + Parameters + ---------- + coordinates : :class:`podpac.Coordinates` + Coordinates to evaluate at compositor sources + + Returns + ------- + sources : :class:`np.ndarray` + Array of sources + + Notes + ----- + * If :attr:`source_coordinates` is defined, only sources that intersect the requested coordinates are selected. + * Sets sources :attr:`interpolation`. + """ + + # select intersecting sources, if possible + if self.source_coordinates is None: + sources = self.sources + else: + try: + _, I = self.source_coordinates.intersect(coordinates, outer=True, return_indices=True) + except: + # Likely non-monotonic coordinates + _, I = self.source_coordinates.intersect(coordinates, outer=False, return_indices=True) + i = I[0] + sources = np.array(self.sources)[i].tolist() + + # set the interpolation properties for sources + if self.trait_is_defined("interpolation") and self.interpolation is not None: + for s in sources: + if s.has_trait("interpolation"): + s.set_trait("interpolation", self.interpolation) + + return sources + + def composite(self, coordinates, data_arrays, result=None): + """Implements the rules for compositing multiple sources together. Must be implemented by child classes. + + Parameters + ---------- + coordinates : :class:`podpac.Coordinates` + {requested_coordinates} + data_arrays : list + Evaluated data from the sources. + result : UnitDataArray, optional + An optional pre-filled array may be supplied, otherwise the output will be allocated. + + Returns + ------- + {eval_return} + """ + + raise NotImplementedError() + + def iteroutputs(self, coordinates): + """Summary + + Parameters + ---------- + coordinates : :class:`podpac.Coordinates` + Coordinates to evaluate at compositor sources + + Yields + ------ + :class:`podpac.core.units.UnitsDataArray` + Output from source node eval method + """ + + # get sources, potentially downselected + sources = self.select_sources(coordinates) + + if settings["DEBUG"]: + self._eval_sources = sources + + if len(sources) == 0: + yield self.create_output_array(coordinates) + return + + if settings["MULTITHREADING"]: + n_threads = thread_manager.request_n_threads(len(sources)) + if n_threads == 1: + thread_manager.release_n_threads(n_threads) + else: + n_threads = 0 + + if settings["MULTITHREADING"] and n_threads > 1: + # evaluate nodes in parallel using thread pool + self._multi_threaded = True + pool = thread_manager.get_thread_pool(processes=n_threads) + outputs = pool.map(lambda src: src.eval(coordinates), sources) + pool.close() + thread_manager.release_n_threads(n_threads) + for output in outputs: + yield output + + else: + # evaluate nodes serially + self._multi_threaded = False + for src in sources: + yield src.eval(coordinates) + + @node_eval + @common_doc(COMMON_COMPOSITOR_DOC) + def eval(self, coordinates, output=None): + """Evaluates this nodes using the supplied coordinates. + + Parameters + ---------- + coordinates : :class:`podpac.Coordinates` + {requested_coordinates} + output : podpac.UnitsDataArray, optional + {eval_output} + + Returns + ------- + {eval_return} + """ + + self._requested_coordinates = coordinates + outputs = self.iteroutputs(coordinates) + output = self.composite(coordinates, outputs, output) + return output + + def find_coordinates(self): + """ + Get the available coordinates for the Node. + + Returns + ------- + coords_list : list + available coordinates from all of the sources. + """ + + return [coords for source in self.sources for coords in source.find_coordinates()] + + @property + def _repr_keys(self): + """list of attribute names, used by __repr__ and __str__ to display minimal info about the node""" + keys = [] + if self.trait_is_defined("sources"): + keys.append("sources") + return keys diff --git a/podpac/core/compositor/ordered_compositor.py b/podpac/core/compositor/ordered_compositor.py new file mode 100644 index 000000000..83449a764 --- /dev/null +++ b/podpac/core/compositor/ordered_compositor.py @@ -0,0 +1,70 @@ +from __future__ import division, unicode_literals, print_function, absolute_import + +import numpy as np + +# Internal imports +from podpac.core.units import UnitsDataArray +from podpac.core.utils import common_doc +from podpac.core.compositor.compositor import COMMON_COMPOSITOR_DOC, BaseCompositor + + +@common_doc(COMMON_COMPOSITOR_DOC) +class OrderedCompositor(BaseCompositor): + """Compositor that combines sources based on their order in self.sources. + + The requested data is interpolated by the sources before being composited. + + Attributes + ---------- + sources : list + Source nodes, in order of preference. Later sources are only used where earlier sources do not provide data. + source_coordinates : :class:`podpac.Coordinates` + Coordinates that make each source unique. Must the same size as ``sources`` and single-dimensional. Optional. + interpolation : str, dict, optional + {interpolation} + """ + + @common_doc(COMMON_COMPOSITOR_DOC) + def composite(self, coordinates, data_arrays, result=None): + """Composites data_arrays in order that they appear. Once a request contains no nans, the result is returned. + + Parameters + ---------- + coordinates : :class:`podpac.Coordinates` + {requested_coordinates} + data_arrays : generator + Evaluated source data, in the same order as the sources. + result : podpac.UnitsDataArray, optional + {eval_output} + + Returns + ------- + {eval_return} This composites the sources together until there are no nans or no more sources. + """ + + if result is None: + result = self.create_output_array(coordinates) + else: + result[:] = np.nan + + mask = UnitsDataArray.create(coordinates, outputs=self.outputs, data=0, dtype=bool) + for data in data_arrays: + if self.outputs is None: + data = data.transpose(*result.dims) + self._composite(result, data, mask) + else: + for name in data["output"]: + self._composite(result.sel(output=name), data.sel(output=name), mask.sel(output=name)) + + # stop if the results are full + if np.all(mask): + break + + return result + + @staticmethod + def _composite(result, data, mask): + source_mask = np.isfinite(data.data) + b = ~mask & source_mask + result.data[b.data] = data.data[b.data] + mask |= source_mask diff --git a/podpac/core/compositor/test/test_base_compositor.py b/podpac/core/compositor/test/test_base_compositor.py new file mode 100644 index 000000000..385999450 --- /dev/null +++ b/podpac/core/compositor/test/test_base_compositor.py @@ -0,0 +1,243 @@ +import pytest +import numpy as np + +import podpac +from podpac.core.data.datasource import DataSource +from podpac.core.data.array_source import Array +from podpac.core.compositor.compositor import BaseCompositor + +COORDS = podpac.Coordinates( + [podpac.clinspace(45, 0, 16), podpac.clinspace(-70, -65, 16), podpac.clinspace(0, 1, 2)], + dims=["lat", "lon", "time"], +) +LON, LAT, TIME = np.meshgrid(COORDS["lon"].coordinates, COORDS["lat"].coordinates, COORDS["time"].coordinates) + +ARRAY_LAT = Array(source=LAT.astype(float), coordinates=COORDS, interpolation="bilinear") +ARRAY_LON = Array(source=LON.astype(float), coordinates=COORDS, interpolation="bilinear") +ARRAY_TIME = Array(source=TIME.astype(float), coordinates=COORDS, interpolation="bilinear") + +MULTI_0_XY = Array(source=np.full(COORDS.shape + (2,), 0), coordinates=COORDS, outputs=["x", "y"]) +MULTI_1_XY = Array(source=np.full(COORDS.shape + (2,), 1), coordinates=COORDS, outputs=["x", "y"]) +MULTI_4_YX = Array(source=np.full(COORDS.shape + (2,), 4), coordinates=COORDS, outputs=["y", "x"]) +MULTI_2_X = Array(source=np.full(COORDS.shape + (1,), 2), coordinates=COORDS, outputs=["x"]) +MULTI_3_Z = Array(source=np.full(COORDS.shape + (1,), 3), coordinates=COORDS, outputs=["z"]) + + +class TestBaseCompositor(object): + def test_init(self): + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + repr(node) + + def test_source_coordinates(self): + # none (default) + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + assert node.source_coordinates is None + + # unstacked + node = BaseCompositor( + sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], + source_coordinates=podpac.Coordinates([[0, 1]], dims=["time"]), + ) + + # stacked + node = BaseCompositor( + sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], + source_coordinates=podpac.Coordinates([[[0, 1], [10, 20]]], dims=["time_alt"]), + ) + + # invalid size + with pytest.raises(ValueError, match="Invalid source_coordinates, source and source_coordinates size mismatch"): + node = BaseCompositor( + sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], + source_coordinates=podpac.Coordinates([[0, 1, 2]], dims=["time"]), + ) + + with pytest.raises(ValueError, match="Invalid source_coordinates, source and source_coordinates size mismatch"): + node = BaseCompositor( + sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], + source_coordinates=podpac.Coordinates([[0, 1, 2]], dims=["time"]), + ) + + # invalid ndims + with pytest.raises(ValueError, match="Invalid source_coordinates"): + node = BaseCompositor( + sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], + source_coordinates=podpac.Coordinates([[0, 1], [10, 20]], dims=["time", "alt"]), + ) + + def test_select_sources_default(self): + node = BaseCompositor( + sources=[DataSource(), DataSource(interpolation="nearest_preview"), podpac.algorithm.Arange()], + interpolation="bilinear", + ) + sources = node.select_sources(podpac.Coordinates([[0, 10]], ["time"])) + + assert isinstance(sources, list) + assert len(sources) == 3 + + def test_select_sources_intersection(self): + source_coords = podpac.Coordinates([[0, 10]], ["time"]) + node = BaseCompositor(sources=[DataSource(), DataSource()], source_coordinates=source_coords) + + # select all + selected = node.select_sources(source_coords) + assert len(selected) == 2 + assert selected[0] == node.sources[0] + assert selected[1] == node.sources[1] + + # select first + c = podpac.Coordinates([podpac.clinspace(0, 1, 10), podpac.clinspace(0, 1, 11), 0], ["lat", "lon", "time"]) + selected = node.select_sources(c) + assert len(selected) == 1 + assert selected[0] == node.sources[0] + + # select second + c = podpac.Coordinates([podpac.clinspace(0, 1, 10), podpac.clinspace(0, 1, 11), 10], ["lat", "lon", "time"]) + selected = node.select_sources(c) + assert len(selected) == 1 + assert selected[0] == node.sources[1] + + # select none + c = podpac.Coordinates([podpac.clinspace(0, 1, 10), podpac.clinspace(0, 1, 11), 100], ["lat", "lon", "time"]) + selected = node.select_sources(c) + assert len(selected) == 0 + + def test_select_sources_set_interpolation(self): + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME], interpolation="nearest") + sources = node.select_sources(COORDS) + assert sources[0].interpolation == "nearest" + assert sources[1].interpolation == "nearest" + assert sources[2].interpolation == "nearest" + assert ARRAY_LAT.interpolation == "bilinear" + assert ARRAY_LON.interpolation == "bilinear" + assert ARRAY_TIME.interpolation == "bilinear" + + # if no interpolation is provided, keep the source interpolation values + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + sources = node.select_sources(COORDS) + assert node.sources[0].interpolation == "bilinear" + assert node.sources[1].interpolation == "bilinear" + assert node.sources[2].interpolation == "bilinear" + + def test_iteroutputs_empty(self): + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + outputs = node.iteroutputs(podpac.Coordinates([-1, -1, -1], dims=["lat", "lon", "time"])) + np.testing.assert_array_equal(next(outputs), [[[np.nan]]]) + np.testing.assert_array_equal(next(outputs), [[[np.nan]]]) + np.testing.assert_array_equal(next(outputs), [[[np.nan]]]) + with pytest.raises(StopIteration): + next(outputs) + + def test_iteroutputs_singlethreaded(self): + with podpac.settings: + podpac.settings["MULTITHREADING"] = False + + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + outputs = node.iteroutputs(COORDS) + np.testing.assert_array_equal(next(outputs), LAT) + np.testing.assert_array_equal(next(outputs), LON) + np.testing.assert_array_equal(next(outputs), TIME) + with pytest.raises(StopIteration): + next(outputs) + assert node._multi_threaded == False + + def test_iteroutputs_multithreaded(self): + with podpac.settings: + podpac.settings["MULTITHREADING"] = True + podpac.settings["N_THREADS"] = 8 + + n_threads_before = podpac.core.managers.multi_threading.thread_manager._n_threads_used + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + outputs = node.iteroutputs(COORDS) + np.testing.assert_array_equal(next(outputs), LAT) + np.testing.assert_array_equal(next(outputs), LON) + np.testing.assert_array_equal(next(outputs), TIME) + with pytest.raises(StopIteration): + next(outputs) + assert node._multi_threaded == True + assert podpac.core.managers.multi_threading.thread_manager._n_threads_used == n_threads_before + + def test_iteroutputs_n_threads_1(self): + with podpac.settings: + podpac.settings["MULTITHREADING"] = True + podpac.settings["N_THREADS"] = 1 + + n_threads_before = podpac.core.managers.multi_threading.thread_manager._n_threads_used + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + outputs = node.iteroutputs(COORDS) + np.testing.assert_array_equal(next(outputs), LAT) + np.testing.assert_array_equal(next(outputs), LON) + np.testing.assert_array_equal(next(outputs), TIME) + with pytest.raises(StopIteration): + next(outputs) + assert node._multi_threaded == False + assert podpac.core.managers.multi_threading.thread_manager._n_threads_used == n_threads_before + + def test_composite(self): + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + with pytest.raises(NotImplementedError): + node.composite(COORDS, iter(())) + + def test_eval(self): + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + with pytest.raises(NotImplementedError): + node.eval(COORDS) + + class MockComposite(BaseCompositor): + def composite(self, coordinates, outputs, result=None): + return next(outputs) + + node = MockComposite(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + output = node.eval(COORDS) + np.testing.assert_array_equal(output, LAT) + + def test_find_coordinates(self): + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + + coord_list = node.find_coordinates() + assert isinstance(coord_list, list) + assert len(coord_list) == 3 + + def test_outputs(self): + # standard single-output + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) + assert node.outputs is None + + # even if the sources have multiple outputs, the default here is outputs + node = BaseCompositor(sources=[MULTI_0_XY, MULTI_1_XY]) + assert node.outputs is None + + def test_auto_outputs(self): + # autodetect single-output + node = BaseCompositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME], auto_outputs=True) + assert node.outputs is None + + # autodetect multi-output + node = BaseCompositor(sources=[MULTI_0_XY, MULTI_1_XY], auto_outputs=True) + assert node.outputs == ["x", "y"] + + node = BaseCompositor(sources=[MULTI_0_XY, MULTI_3_Z], auto_outputs=True) + assert node.outputs == ["x", "y", "z"] + + node = BaseCompositor(sources=[MULTI_3_Z, MULTI_0_XY], auto_outputs=True) + assert node.outputs == ["z", "x", "y"] + + node = BaseCompositor(sources=[MULTI_0_XY, MULTI_4_YX], auto_outputs=True) + assert node.outputs == ["x", "y"] + + # mixed + with pytest.raises(ValueError, match="Cannot composite standard sources with multi-output sources."): + node = BaseCompositor(sources=[MULTI_2_X, ARRAY_LAT], auto_outputs=True) + + # no sources + node = BaseCompositor(sources=[], auto_outputs=True) + assert node.outputs is None + + def test_forced_invalid_sources(self): + class MyCompositor(BaseCompositor): + sources = [MULTI_2_X, ARRAY_LAT] + auto_outputs = True + + node = MyCompositor() + with pytest.raises(RuntimeError, match="Compositor sources were not validated correctly"): + node.outputs diff --git a/podpac/core/compositor/test/test_ordered_compositor.py b/podpac/core/compositor/test/test_ordered_compositor.py new file mode 100644 index 000000000..002e3f931 --- /dev/null +++ b/podpac/core/compositor/test/test_ordered_compositor.py @@ -0,0 +1,168 @@ +import numpy as np + +import podpac +from podpac.core.data.array_source import Array +from podpac.core.compositor.ordered_compositor import OrderedCompositor + +COORDS = podpac.Coordinates( + [podpac.clinspace(45, 0, 16), podpac.clinspace(-70, -65, 16), podpac.clinspace(0, 1, 2)], + dims=["lat", "lon", "time"], +) + +MULTI_0_XY = Array(source=np.full(COORDS.shape + (2,), 0), coordinates=COORDS, outputs=["x", "y"]) +MULTI_1_XY = Array(source=np.full(COORDS.shape + (2,), 1), coordinates=COORDS, outputs=["x", "y"]) +MULTI_4_YX = Array(source=np.full(COORDS.shape + (2,), 4), coordinates=COORDS, outputs=["y", "x"]) +MULTI_2_X = Array(source=np.full(COORDS.shape + (1,), 2), coordinates=COORDS, outputs=["x"]) +MULTI_3_Z = Array(source=np.full(COORDS.shape + (1,), 3), coordinates=COORDS, outputs=["z"]) + + +class TestOrderedCompositor(object): + def test_composite(self): + with podpac.settings: + podpac.settings["MULTITHREADING"] = False + + acoords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) + asource = np.ones(acoords.shape) + asource[0, :] = np.nan + a = Array(source=asource, coordinates=acoords) + + bcoords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40]], dims=["lat", "lon"]) + bsource = np.zeros(bcoords.shape) + bsource[:, 0] = np.nan + b = Array(source=bsource, coordinates=bcoords) + + coords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40, 50]], dims=["lat", "lon"]) + + node = OrderedCompositor(sources=[a, b], interpolation="bilinear") + expected = np.array( + [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 1.0, 1.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] + ) + np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) + + node = OrderedCompositor(sources=[b, a], interpolation="bilinear") + expected = np.array( + [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 0.0, 0.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] + ) + np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) + + def test_composite_multithreaded(self): + with podpac.settings: + podpac.settings["MULTITHREADING"] = True + podpac.settings["N_THREADS"] = 8 + + acoords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) + asource = np.ones(acoords.shape) + asource[0, :] = np.nan + a = Array(source=asource, coordinates=acoords) + + bcoords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40]], dims=["lat", "lon"]) + bsource = np.zeros(bcoords.shape) + bsource[:, 0] = np.nan + b = Array(source=bsource, coordinates=bcoords) + + coords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40, 50]], dims=["lat", "lon"]) + + node = OrderedCompositor(sources=[a, b], interpolation="bilinear") + expected = np.array( + [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 1.0, 1.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] + ) + np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) + + node = OrderedCompositor(sources=[b, a], interpolation="bilinear") + expected = np.array( + [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 0.0, 0.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] + ) + np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) + + def test_composite_short_circuit(self): + with podpac.settings: + podpac.settings["MULTITHREADING"] = False + podpac.settings["DEBUG"] = True + + coords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) + a = Array(source=np.ones(coords.shape), coordinates=coords) + b = Array(source=np.zeros(coords.shape), coordinates=coords) + node = OrderedCompositor(sources=[a, b], interpolation="bilinear") + output = node.eval(coords) + np.testing.assert_array_equal(output, a.source) + assert node._eval_sources[0]._output is not None + assert node._eval_sources[1]._output is None + + def test_composite_short_circuit_multithreaded(self): + with podpac.settings: + podpac.settings["MULTITHREADING"] = True + podpac.settings["N_THREADS"] = 8 + podpac.settings["DEBUG"] = True + + coords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) + n_threads_before = podpac.core.managers.multi_threading.thread_manager._n_threads_used + a = Array(source=np.ones(coords.shape), coordinates=coords) + b = Array(source=np.zeros(coords.shape), coordinates=coords) + node = OrderedCompositor(sources=[a, b], interpolation="bilinear") + output = node.eval(coords) + np.testing.assert_array_equal(output, a.source) + assert node._multi_threaded == True + assert podpac.core.managers.multi_threading.thread_manager._n_threads_used == n_threads_before + + def test_composite_into_result(self): + coords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) + a = Array(source=np.ones(coords.shape), coordinates=coords) + b = Array(source=np.zeros(coords.shape), coordinates=coords) + node = OrderedCompositor(sources=[a, b], interpolation="bilinear") + result = node.create_output_array(coords, data=np.random.random(coords.shape)) + output = node.eval(coords, output=result) + np.testing.assert_array_equal(output, a.source) + np.testing.assert_array_equal(result, a.source) + + def test_composite_multiple_outputs(self): + node = OrderedCompositor(sources=[MULTI_0_XY, MULTI_1_XY], auto_outputs=True) + output = node.eval(COORDS) + assert output.dims == ("lat", "lon", "time", "output") + np.testing.assert_array_equal(output["output"], ["x", "y"]) + np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 0)) + np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 0)) + + node = OrderedCompositor(sources=[MULTI_1_XY, MULTI_0_XY], auto_outputs=True) + output = node.eval(COORDS) + assert output.dims == ("lat", "lon", "time", "output") + np.testing.assert_array_equal(output["output"], ["x", "y"]) + np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 1)) + np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 1)) + + def test_composite_combine_multiple_outputs(self): + node = OrderedCompositor(sources=[MULTI_0_XY, MULTI_1_XY, MULTI_2_X, MULTI_3_Z], auto_outputs=True) + output = node.eval(COORDS) + assert output.dims == ("lat", "lon", "time", "output") + np.testing.assert_array_equal(output["output"], ["x", "y", "z"]) + np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 0)) + np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 0)) + np.testing.assert_array_equal(output.sel(output="z"), np.full(COORDS.shape, 3)) + + node = OrderedCompositor(sources=[MULTI_3_Z, MULTI_2_X, MULTI_0_XY, MULTI_1_XY], auto_outputs=True) + output = node.eval(COORDS) + assert output.dims == ("lat", "lon", "time", "output") + np.testing.assert_array_equal(output["output"], ["z", "x", "y"]) + np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 2)) + np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 0)) + np.testing.assert_array_equal(output.sel(output="z"), np.full(COORDS.shape, 3)) + + node = OrderedCompositor(sources=[MULTI_2_X, MULTI_4_YX], auto_outputs=True) + output = node.eval(COORDS) + assert output.dims == ("lat", "lon", "time", "output") + np.testing.assert_array_equal(output["output"], ["x", "y"]) + np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 2)) + np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 4)) + + def test_composite_stacked_unstacked(self): + anative = podpac.Coordinates([podpac.clinspace((0, 1), (1, 2), size=3)], dims=["lat_lon"]) + bnative = podpac.Coordinates([podpac.clinspace(-2, 3, 3), podpac.clinspace(-1, 4, 3)], dims=["lat", "lon"]) + a = Array(source=np.random.rand(3), coordinates=anative) + b = Array(source=np.random.rand(3, 3) + 2, coordinates=bnative) + + coords = podpac.Coordinates([podpac.clinspace(-3, 4, 32), podpac.clinspace(-2, 5, 32)], dims=["lat", "lon"]) + + node = OrderedCompositor(sources=[a, b], interpolation="nearest") + o = node.eval(coords) + # Check that both data sources are being used in the interpolation + assert np.any(o.data >= 2) + assert np.any(o.data <= 1) diff --git a/podpac/core/compositor/test/test_tile_compositor.py b/podpac/core/compositor/test/test_tile_compositor.py new file mode 100644 index 000000000..417b90d3f --- /dev/null +++ b/podpac/core/compositor/test/test_tile_compositor.py @@ -0,0 +1,160 @@ +import itertools + +import pytest +import numpy as np +import traitlets as tl + +import podpac +from podpac.utils import cached_property +from podpac.data import DataSource +from podpac.core.compositor.tile_compositor import TileCompositor, UniformTileCompositor, UniformTileMixin + + +class MockTile(UniformTileMixin, podpac.data.DataSource): + x = tl.Int() # used as a modifier to distinguish between tiles in the tests + data = np.arange(16).reshape(1, 4, 4) + + def get_data(self, coordinates, coordinates_index): + return self.create_output_array(coordinates, data=self.data[coordinates_index] + self.x) + + +class MockTileCompositor(UniformTileCompositor): + shape = (3, 3, 3) + + @cached_property + def sources(self): + return [ + MockTile(tile=(i, j, k), grid=self, x=20 * n) + for n, (i, j, k) in enumerate(itertools.product(range(3), range(3), range(3))) + ] + + def get_coordinates(self): + return podpac.Coordinates( + [["2018-01-01", "2018-01-03", "2018-01-05"], podpac.clinspace(0, 11, 12), podpac.clinspace(0, 11, 12)], + dims=["time", "lat", "lon"], + ) + + +class TestUniformTileMixin(object): + def test_tile_coordinates_index(self): + class MyTile(UniformTileMixin, DataSource): + pass + + grid = MockTileCompositor() + tile = MyTile(grid=grid, tile=(1, 1, 0)) + + assert tile.width == grid.tile_width + assert tile.coordinates == podpac.Coordinates( + ["2018-01-03", podpac.clinspace(4, 7, 4), podpac.clinspace(0, 3, 4)], dims=["time", "lat", "lon"] + ) + + def test_repr(self): + class MyTile(UniformTileMixin, DataSource): + pass + + grid = MockTileCompositor() + tile = MyTile(grid=grid, tile=(1, 1, 0)) + assert "MyTile" in repr(tile) + assert "tile=(1, 1, 0)" in repr(tile) + + +class TestTileCompositor(object): + def test_sources(self): + node = TileCompositor() + with pytest.raises(NotImplementedError): + node.sources + + node = MockTileCompositor() + assert len(node.sources) == 27 + assert all(isinstance(tile, MockTile) for tile in node.sources) + + def test_coordinates(self): + node = TileCompositor() + with pytest.raises(NotImplementedError): + node.coordinates + + node = MockTileCompositor() + assert node.coordinates == podpac.Coordinates( + [["2018-01-01", "2018-01-03", "2018-01-05"], podpac.clinspace(0, 11, 12), podpac.clinspace(0, 11, 12)], + dims=["time", "lat", "lon"], + ) + + +class TestUniformTileCompositor(object): + def test_tile_width(self): + node = MockTileCompositor() + assert node.tile_width == (1, 4, 4) + + def test_get_data_coordinates(self): + node = MockTileCompositor() + + # all coordinates + output = node.eval(node.coordinates) + assert np.all(np.isfinite(output)) + np.testing.assert_array_equal(output[0, :4, :4], np.arange(16).reshape(4, 4) + 0) + np.testing.assert_array_equal(output[0, :4, 4:8], np.arange(16).reshape(4, 4) + 20) + np.testing.assert_array_equal(output[0, 4:8, :4], np.arange(16).reshape(4, 4) + 60) + np.testing.assert_array_equal(output[1, :4, :4], np.arange(16).reshape(4, 4) + 180) + + # single point + output = node.eval(node.coordinates[2, 2, 2]) + np.testing.assert_array_equal(output, [[[370]]]) + + # partial tiles + output = node.eval(node.coordinates[1, 2:6, 2:4]) + np.testing.assert_array_equal(output, [[[190, 191], [194, 195], [242, 243], [246, 247]]]) + + def test_get_data_spatial_interpolation(self): + # exact times, interpolated lat/lon + c1 = podpac.Coordinates(["2018-01-01", [0.25, 0.75, 1.25], [0.25, 0.75, 1.25]], dims=["time", "lat", "lon"]) + c2 = podpac.Coordinates(["2018-01-03", [0.25, 0.75, 1.25], [0.25, 0.75, 1.25]], dims=["time", "lat", "lon"]) + + node = MockTileCompositor(interpolation="nearest") + np.testing.assert_array_equal(node.eval(c1), [[[0, 1, 1], [4, 5, 5], [4, 5, 5]]]) + np.testing.assert_array_equal(node.eval(c2), [[[180, 181, 181], [184, 185, 185], [184, 185, 185]]]) + + node = MockTileCompositor(interpolation="bilinear") + np.testing.assert_array_equal(node.eval(c1), [[[1.25, 1.75, 2.25], [3.25, 3.75, 4.25], [5.25, 5.75, 6.25]]]) + np.testing.assert_array_equal( + node.eval(c2), [[[181.25, 181.75, 182.25], [183.25, 183.75, 184.25], [185.25, 185.75, 186.25]]] + ) + + def test_get_data_time_interpolation(self): + # exact lat/lon, interpolated times + c1 = podpac.Coordinates(["2018-01-01T01:00:00", [1, 2], [1, 2]], dims=["time", "lat", "lon"]) + c2 = podpac.Coordinates(["2018-01-02T23:00:00", [1, 2], [1, 2]], dims=["time", "lat", "lon"]) + c3 = podpac.Coordinates(["2018-01-03T01:00:00", [1, 2], [1, 2]], dims=["time", "lat", "lon"]) + + node = MockTileCompositor(interpolation="nearest") + np.testing.assert_array_equal(node.eval(c1), [[[5, 6], [9, 10]]]) + np.testing.assert_array_equal(node.eval(c2), [[[185, 186], [189, 190]]]) + np.testing.assert_array_equal(node.eval(c3), [[[185, 186], [189, 190]]]) + + # TODO + # node = MockTileCompositor(interpolation='bilinear') + # np.testing.assert_array_equal(node.eval(c1), TODO) + # np.testing.assert_array_equal(node.eval(c2), TODO) + # np.testing.assert_array_equal(node.eval(c3), TODO) + + def test_get_data_interpolation(self): + # interpolated lat/lon and time + c1 = podpac.Coordinates( + ["2018-01-01T01:00:00", [0.25, 0.75, 1.25], [0.25, 0.75, 1.25]], dims=["time", "lat", "lon"] + ) + c2 = podpac.Coordinates( + ["2018-01-02T23:00:00", [0.25, 0.75, 1.25], [0.25, 0.75, 1.25]], dims=["time", "lat", "lon"] + ) + c3 = podpac.Coordinates( + ["2018-01-03T01:00:00", [0.25, 0.75, 1.25], [0.25, 0.75, 1.25]], dims=["time", "lat", "lon"] + ) + + node = MockTileCompositor(interpolation="nearest") + np.testing.assert_array_equal(node.eval(c1), [[[0, 1, 1], [4, 5, 5], [4, 5, 5]]]) + np.testing.assert_array_equal(node.eval(c2), [[[180, 181, 181], [184, 185, 185], [184, 185, 185]]]) + np.testing.assert_array_equal(node.eval(c3), [[[180, 181, 181], [184, 185, 185], [184, 185, 185]]]) + + # TODO + # node = MockTileCompositor(interpolation='bilinear') + # np.testing.assert_array_equal(node.eval(c1), TODO) + # np.testing.assert_array_equal(node.eval(c2), TODO) + # np.testing.assert_array_equal(node.eval(c3), TODO) diff --git a/podpac/core/compositor/tile_compositor.py b/podpac/core/compositor/tile_compositor.py new file mode 100644 index 000000000..ef7898945 --- /dev/null +++ b/podpac/core/compositor/tile_compositor.py @@ -0,0 +1,131 @@ +from __future__ import division, unicode_literals, print_function, absolute_import + +import numpy as np +import traitlets as tl + +import podpac +from podpac.core.coordinates import Coordinates +from podpac.core.units import UnitsDataArray +from podpac.core.utils import common_doc, cached_property, ind2slice +from podpac.core.data.datasource import DataSource, COMMON_DATA_DOC + + +@common_doc(COMMON_DATA_DOC) +class TileCompositor(DataSource): + """Composite tiled datasources. + + Attributes + ---------- + sources : list + The tiled data sources. + coordinates : :class:`podpac.Coordinates` + Coordinates encompassing all of the tiled sources. + + Notes + ----- + This compositor aggregates source data first and then interpolates the requested coordinates. + """ + + @property + def sources(self): + """ Tiled data sources (using the TileMixin). + + Child classes should define these sources including a reference to itself and the tile_coordinates_index. + """ + + raise NotImplementedError() + + def get_data(self, coordinates, coordinates_index): + """{get_data} + """ + + output = self.create_output_array(coordinates) + for source in self.sources: + c, I = source.coordinates.intersect(coordinates, return_indices=True) + if c.size == 0: + continue + source_data = source.get_data(c, I) + output.loc[source_data.coords] = source_data + + return output + + +@common_doc(COMMON_DATA_DOC) +class UniformTileCompositor(TileCompositor): + """Composite a grid of uniformly tiled datasources. + + Attributes + ---------- + sources : list + The tiled data sources. + coordinates : :class:`podpac.Coordinates` + Coordinates encompassing all of the tiled sources. + shape : tuple + shape of the tile grid + tile_width : tuple + shape of the coordinates for each tile + + Notes + ----- + This compositor aggregates source data first and then interpolates the requested coordinates. + """ + + shape = tl.Tuple() + _repr_keys = ["shape"] + + @property + def sources(self): + """ Tiled data sources (using the UniformTileMixin). + + Child classes should define these sources including a reference to itself and the tile index in the grid. + """ + + raise NotImplementedError() + + @cached_property + def tile_width(self): + """Tuple of the number of coordinates that the tile covers in each dimension.""" + return tuple(int(n / m) for n, m in zip(self.coordinates.shape, self.shape)) + + +@common_doc(COMMON_DATA_DOC) +class UniformTileMixin(tl.HasTraits): + """DataSource mixin for uniform tiles in a grid. + + Defines the tile coordinates from the grid coordinates using the tile position in the grid. + + Attributes + ---------- + grid : TileCompositor + tiling compositor containing the grid coordinates, grid shape, and tile sources + tile : tuple + index for this tile in the grid + width : tuple + width + """ + + grid = tl.Instance(TileCompositor) + tile = tl.Tuple() + + @tl.validate("tile") + def _validate_tile(self, d): + tile = d["value"] + if len(tile) != len(self.grid.shape): + raise ValueError("tile index does not match grid shape (%d != %d)" % (len(tile), len(self.grid.shape))) + if not all(0 <= i < n for (i, n) in zip(tile, self.grid.shape)): + raise ValueError("tile index %s out of range for grid shape %s)" % (len(tile), len(self.grid.shape))) + return tile + + @property + def width(self): + return self.grid.tile_width + + def get_coordinates(self): + """{get_coordinates} + """ + Is = tuple(slice(w * i, w * (i + 1)) for i, w in zip(self.tile, self.width)) + return self.grid.coordinates[Is] + + @property + def _repr_keys(self): + return super(UniformTileMixin, self)._repr_keys + ["tile"] diff --git a/podpac/core/coordinates/__init__.py b/podpac/core/coordinates/__init__.py index 883c1f102..1212fd8e9 100644 --- a/podpac/core/coordinates/__init__.py +++ b/podpac/core/coordinates/__init__.py @@ -3,6 +3,7 @@ from podpac.core.coordinates.utils import make_coord_array from podpac.core.coordinates.utils import make_coord_delta_array from podpac.core.coordinates.utils import add_coord +from podpac.core.coordinates.utils import VALID_DIMENSION_NAMES from podpac.core.coordinates.base_coordinates import BaseCoordinates from podpac.core.coordinates.coordinates1d import Coordinates1d diff --git a/podpac/core/coordinates/array_coordinates1d.py b/podpac/core/coordinates/array_coordinates1d.py index 446841cca..5426bdb07 100644 --- a/podpac/core/coordinates/array_coordinates1d.py +++ b/podpac/core/coordinates/array_coordinates1d.py @@ -32,20 +32,24 @@ class ArrayCoordinates1d(Coordinates1d): Dimension name, one of 'lat', 'lon', 'time', or 'alt'. coordinates : array, read-only Full array of coordinate values. - ctype : str - Coordinates type: 'point', 'left', 'right', or 'midpoint'. - segment_lengths : array, float, timedelta - When ctype is a segment type, the segment lengths for the coordinates. - + See Also -------- :class:`Coordinates1d`, :class:`UniformCoordinates1d` """ coordinates = ArrayTrait(ndim=1, read_only=True) - coordinates.__doc__ = ":array: User-defined coordinate values" + # coordinates.__doc__ = ":array: User-defined coordinate values" + # coordinates = None + + _is_monotonic = None + _is_descending = None + _is_uniform = None + _step = None + _start = None + _stop = None - def __init__(self, coordinates, name=None, ctype=None, segment_lengths=None): + def __init__(self, coordinates, name=None): """ Create 1d coordinates from an array. @@ -55,90 +59,39 @@ def __init__(self, coordinates, name=None, ctype=None, segment_lengths=None): coordinate values. name : str, optional Dimension name, one of 'lat', 'lon', 'time', or 'alt'. - ctype : str, optional - Coordinates type: 'point', 'left', 'right', or 'midpoint'. - segment_lengths : array, optional - When ctype is a segment type, the segment lengths for the coordinates. The segment_lengths are required - for nonmonotonic coordinates. The segment can be inferred from coordinate values for monotonic coordinates. """ # validate and set coordinates - self.set_trait("coordinates", make_coord_array(coordinates)) + coordinates = make_coord_array(coordinates) + self.set_trait("coordinates", coordinates) + self.not_a_trait = coordinates # precalculate once if self.coordinates.size == 0: - self._is_monotonic = None - self._is_descending = None - self._is_uniform = None + pass elif self.coordinates.size == 1: self._is_monotonic = True - self._is_descending = None - self._is_uniform = True else: - deltas = (self.coordinates[1:] - self.coordinates[:-1]).astype(float) * ( + deltas = (self.coordinates[1:] - self.coordinates[:-1]).astype(float) * np.sign( self.coordinates[1] - self.coordinates[0] ).astype(float) if np.any(deltas <= 0): self._is_monotonic = False - self._is_descending = None + self._is_descending = False self._is_uniform = False else: self._is_monotonic = True self._is_descending = self.coordinates[1] < self.coordinates[0] self._is_uniform = np.allclose(deltas, deltas[0]) + if self._is_uniform: + self._start = self.coordinates[0] + self._stop = self.coordinates[-1] + self._step = (self._stop - self._start) / (self.coordinates.size - 1) # set common properties - super(ArrayCoordinates1d, self).__init__(name=name, ctype=ctype, segment_lengths=segment_lengths) - - # check segment lengths - if segment_lengths is None: - if self.ctype == "point" or self.size == 0: - self.set_trait("segment_lengths", None) - elif self.dtype == np.datetime64: - raise TypeError("segment_lengths required for datetime coordinates (if ctype != 'point')") - elif self.size == 1: - raise TypeError("segment_lengths required for coordinates of size 1 (if ctype != 'point')") - elif not self.is_monotonic: - raise TypeError("segment_lengths required for nonmonotonic coordinates (if ctype != 'point')") - - @tl.default("ctype") - def _default_ctype(self): - if self.size == 0 or self.size == 1 or not self.is_monotonic or self.dtype == np.datetime64: - return "point" - else: - return "midpoint" - - @tl.default("segment_lengths") - def _default_segment_lengths(self): - if self.ctype == "point": - return None - - if self.is_uniform: - return np.abs(self.coordinates[1] - self.coordinates[0]) - - deltas = np.abs(self.coordinates[1:] - self.coordinates[:-1]) - if self.is_descending: - deltas = deltas[::-1] - - segment_lengths = np.zeros(self.coordinates.size) - if self.ctype == "left": - segment_lengths[:-1] = deltas - segment_lengths[-1] = segment_lengths[-2] - elif self.ctype == "right": - segment_lengths[1:] = deltas - segment_lengths[0] = segment_lengths[1] - elif self.ctype == "midpoint": - segment_lengths[:-1] = deltas - segment_lengths[1:] += deltas - segment_lengths[1:-1] /= 2 - - if self.is_descending: - segment_lengths = segment_lengths[::-1] - - segment_lengths.setflags(write=False) - return segment_lengths + super(ArrayCoordinates1d, self).__init__(name=name) def __eq__(self, other): if not super(ArrayCoordinates1d, self).__eq__(other): @@ -162,11 +115,6 @@ def from_xarray(cls, x, **kwargs): --------- x : xarray.DataArray Nade DataArray of the coordinate values - ctype : str, optional - Coordinates type: 'point', 'left', 'right', or 'midpoint'. - segment_lengths : (low, high), optional - When ctype is a segment type, the segment lengths for the coordinates. The segment_lengths are required - for nonmonotonic coordinates. The segment can be inferred from coordinate values for monotonic coordinates. Returns ------- @@ -191,8 +139,7 @@ def from_definition(cls, d): c = ArrayCoordinates1d.from_definition({ "values": [0, 1, 2, 3], - "name": "lat", - "ctype": "points" + "name": "lat" }) Arguments @@ -227,8 +174,23 @@ def copy(self): Copy of the coordinates. """ - kwargs = self.properties - return ArrayCoordinates1d(self.coordinates, **kwargs) + return ArrayCoordinates1d(self.coordinates, **self.properties) + + def simplify(self): + """ Get the simplified/optimized representation of these coordinates. + + Returns + ------- + simplified : ArrayCoordinates1d, UniformCoordinates1d + UniformCoordinates1d if the coordinates are uniform, otherwise ArrayCoordinates1d + """ + + from podpac.core.coordinates.uniform_coordinates1d import UniformCoordinates1d + + if self.is_uniform: + return UniformCoordinates1d(self.start, self.stop, self.step, **self.properties) + + return self # ------------------------------------------------------------------------------------------------------------------ # standard methods, array-like @@ -238,17 +200,7 @@ def __len__(self): return self.size def __getitem__(self, index): - coordinates = self.coordinates[index] - kwargs = self.properties - kwargs["ctype"] = self.ctype - - if self.ctype != "point": - if isinstance(self.segment_lengths, np.ndarray): - kwargs["segment_lengths"] = self.segment_lengths[index] - else: - kwargs["segment_lengths"] = self.segment_lengths - - return ArrayCoordinates1d(coordinates, **kwargs) + return ArrayCoordinates1d(self.coordinates[index], **self.properties) # ------------------------------------------------------------------------------------------------------------------ # Properties @@ -285,12 +237,22 @@ def is_descending(self): def is_uniform(self): return self._is_uniform + @property + def start(self): + return self._start + + @property + def stop(self): + return self._stop + + @property + def step(self): + return self._step + @property def bounds(self): """ Low and high coordinate bounds. """ - # TODO are we sure this can't be a tuple? - if self.size == 0: lo, hi = np.nan, np.nan elif self.is_monotonic: @@ -300,10 +262,7 @@ def bounds(self): else: lo, hi = np.nanmin(self.coordinates), np.nanmax(self.coordinates) - # read-only array with the correct dtype - bounds = np.array([lo, hi], dtype=self.dtype) - bounds.setflags(write=False) - return bounds + return lo, hi @property def argbounds(self): diff --git a/podpac/core/coordinates/base_coordinates.py b/podpac/core/coordinates/base_coordinates.py index d2cfcc1db..2f1b624e5 100644 --- a/podpac/core/coordinates/base_coordinates.py +++ b/podpac/core/coordinates/base_coordinates.py @@ -10,9 +10,6 @@ class BaseCoordinates(tl.HasTraits): def _set_name(self, value): raise NotImplementedError - def _set_ctype(self, value): - raise NotImplementedError - @property def name(self): """:str: Dimension name.""" @@ -72,6 +69,10 @@ def copy(self): """Deep copy of the coordinates and their properties.""" raise NotImplementedError + def get_area_bounds(self, boundary): + """Get coordinate area bounds, including boundary information, for each unstacked dimension. """ + raise NotImplementedError + def select(self, bounds, outer=False, return_indices=False): """Get coordinate values that are with the given bounds.""" raise NotImplementedError diff --git a/podpac/core/coordinates/cfunctions.py b/podpac/core/coordinates/cfunctions.py index d8f4f93ae..4090de63c 100644 --- a/podpac/core/coordinates/cfunctions.py +++ b/podpac/core/coordinates/cfunctions.py @@ -6,7 +6,7 @@ from podpac.core.coordinates.stacked_coordinates import StackedCoordinates -def crange(start, stop, step, name=None, ctype=None): +def crange(start, stop, step, name=None): """ Create uniformly-spaced 1d coordinates with a start, stop, and step. @@ -25,8 +25,6 @@ def crange(start, stop, step, name=None, ctype=None): Signed, non-zero step between coordinates. name : str, optional Dimension name. - ctype : str, optional - Coordinate type (point, midpoint, left, or right) Returns ------- @@ -34,10 +32,10 @@ def crange(start, stop, step, name=None, ctype=None): Uniformly-spaced 1d coordinates. """ - return UniformCoordinates1d(start, stop, step=step, name=name, ctype=ctype) + return UniformCoordinates1d(start, stop, step=step, name=name) -def clinspace(start, stop, size, name=None, ctype=None): +def clinspace(start, stop, size, name=None): """ Create uniformly-spaced 1d or stacked coordinates with a start, stop, and size. @@ -55,9 +53,7 @@ def clinspace(start, stop, size, name=None, ctype=None): Number of coordinates. name : str, optional Dimension name. - ctype : str, optional - Coordinate type (point, midpoint, left, or right) - + Returns ------- :class:`UniformCoordinates1d` @@ -81,6 +77,6 @@ def clinspace(start, stop, size, name=None, ctype=None): cs = [UniformCoordinates1d(start[i], stop[i], size=size) for i in range(a[0].size)] c = StackedCoordinates(cs, name=name) else: - c = UniformCoordinates1d(start, stop, size=size, name=name, ctype=ctype) + c = UniformCoordinates1d(start, stop, size=size, name=name) return c diff --git a/podpac/core/coordinates/coordinates.py b/podpac/core/coordinates/coordinates.py index b752eb549..62c1e665e 100644 --- a/podpac/core/coordinates/coordinates.py +++ b/podpac/core/coordinates/coordinates.py @@ -27,11 +27,13 @@ from podpac.core.utils import OrderedDictTrait, _get_query_params_from_url, _get_param from podpac.core.coordinates.base_coordinates import BaseCoordinates from podpac.core.coordinates.coordinates1d import Coordinates1d +from podpac.core.coordinates.dependent_coordinates import ArrayCoordinatesNd from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d from podpac.core.coordinates.uniform_coordinates1d import UniformCoordinates1d from podpac.core.coordinates.stacked_coordinates import StackedCoordinates from podpac.core.coordinates.dependent_coordinates import DependentCoordinates from podpac.core.coordinates.rotated_coordinates import RotatedCoordinates +from podpac.core.coordinates.cfunctions import clinspace # Optional dependencies from lazy_import import lazy_module, lazy_class @@ -77,7 +79,7 @@ class Coordinates(tl.HasTraits): _coords = OrderedDictTrait(trait=tl.Instance(BaseCoordinates), default_value=OrderedDict()) - def __init__(self, coords, dims=None, crs=None, ctype=None): + def __init__(self, coords, dims=None, crs=None, validate_crs=True): """ Create multidimensional coordinates. @@ -96,9 +98,9 @@ def __init__(self, coords, dims=None, crs=None, ctype=None): * 'lat', 'lon', 'alt', or 'time' for unstacked coordinates * dimension names joined by an underscore for stacked coordinates crs : str, optional - Coordinate reference system. Supports any PROJ4 or PROJ6 compliant string (https://proj.org). - ctype : str, optional - Default coordinates type. One of 'point', 'midpoint', 'left', 'right'. + Coordinate reference system. Supports PROJ4 and WKT. + validate_crs : bool, optional + Use False to skip crs validation. Default True. """ if not isinstance(coords, (list, tuple, np.ndarray, xr.DataArray)): @@ -139,17 +141,26 @@ def __init__(self, coords, dims=None, crs=None, ctype=None): else: c = ArrayCoordinates1d(coords[i]) - # propagate properties and name + # propagate name c._set_name(dim) - if ctype is not None: - c._set_ctype(ctype) # set coords dcoords[dim] = c self.set_trait("_coords", dcoords) + if crs is not None: - self.set_trait("crs", crs) + # validate + if validate_crs: + # raises pyproj.CRSError if invalid + CRS = pyproj.CRS(crs) + + # make sure CRS defines vertical units + if "alt" in self.udims and not CRS.is_vertical: + raise ValueError("Altitude dimension is defined, but CRS does not contain vertical unit") + + crs = self.set_trait("crs", crs) + super(Coordinates, self).__init__() @tl.validate("_coords") @@ -174,21 +185,6 @@ def _validate_coords(self, d): def _default_crs(self): return settings["DEFAULT_CRS"] - @tl.validate("crs") - def _validate_crs(self, d): - val = d["value"] - CRS = pyproj.CRS(val) # raises pyproj.CRSError if invalid - - # make sure CRS defines vertical units - if "alt" in self.udims and not CRS.is_vertical: - raise ValueError("Altitude dimension is defined, but CRS does not contain vertical unit") - - return val - - @tl.observe("crs") - def _observe_crs(self, d): - crs = d["new"] - # ------------------------------------------------------------------------------------------------------------------ # Alternate constructors # ------------------------------------------------------------------------------------------------------------------ @@ -218,7 +214,7 @@ def _coords_from_dict(d, order=None): return coords @classmethod - def grid(cls, dims=None, crs=None, ctype=None, **kwargs): + def grid(cls, dims=None, crs=None, **kwargs): """ Create a grid of coordinates. @@ -249,8 +245,6 @@ def grid(cls, dims=None, crs=None, ctype=None, **kwargs): argument is optional, and the dims will match the order of the provided keyword arguments. crs : str, optional Coordinate reference system. Supports any PROJ4 or PROJ6 compliant string (https://proj.org). - ctype : str, optional - Default coordinates type. One of 'point', 'midpoint', 'left', 'right'. Returns ------- @@ -263,10 +257,10 @@ def grid(cls, dims=None, crs=None, ctype=None, **kwargs): """ coords = cls._coords_from_dict(kwargs, order=dims) - return cls(coords, crs=crs, ctype=ctype) + return cls(coords, crs=crs) @classmethod - def points(cls, crs=None, ctype=None, dims=None, **kwargs): + def points(cls, crs=None, dims=None, **kwargs): """ Create a list of multidimensional coordinates. @@ -300,8 +294,6 @@ def points(cls, crs=None, ctype=None, dims=None, **kwargs): argument is optional, and the dims will match the order of the provided keyword arguments. crs : str, optional Coordinate reference system. Supports any PROJ4 or PROJ6 compliant string (https://proj.org/). - ctype : str, optional - Default coordinates type. One of 'point', 'midpoint', 'left', 'right'. Returns ------- @@ -315,10 +307,10 @@ def points(cls, crs=None, ctype=None, dims=None, **kwargs): coords = cls._coords_from_dict(kwargs, order=dims) stacked = StackedCoordinates(coords) - return cls([stacked], crs=crs, ctype=ctype) + return cls([stacked], crs=crs) @classmethod - def from_xarray(cls, xcoord, crs=None, ctype=None): + def from_xarray(cls, xcoord, crs=None): """ Create podpac Coordinates from xarray coords. @@ -328,8 +320,6 @@ def from_xarray(cls, xcoord, crs=None, ctype=None): xarray coords crs : str, optional Coordinate reference system. Supports any PROJ4 or PROJ6 compliant string (https://proj.org/). - ctype : str, optional - Default coordinates type. One of 'point', 'midpoint', 'left', 'right'. Returns ------- @@ -352,7 +342,7 @@ def from_xarray(cls, xcoord, crs=None, ctype=None): raise NotImplementedError coords.append(c) - return cls(coords, crs=crs, ctype=ctype) + return cls(coords, crs=crs) @classmethod def from_json(cls, s): @@ -376,7 +366,6 @@ def from_json(cls, s): }, { "name": "time", - "ctype": "left" "values": [ "2018-01-01", "2018-01-03", @@ -601,7 +590,9 @@ def __getitem__(self, index): indices.append(index[i]) i += 1 - return Coordinates([c[I] for c, I in zip(self._coords.values(), indices)], **self.properties) + return Coordinates( + [c[I] for c, I in zip(self._coords.values(), indices)], validate_crs=False, **self.properties + ) def __setitem__(self, dim, c): @@ -660,6 +651,7 @@ def __eq__(self, other): return False # properties + # TODO check transform instead if self.CRS != other.CRS: return False @@ -766,11 +758,6 @@ def bounds(self): """:dict: Dictionary of (low, high) coordinates bounds in each unstacked dimension""" return {dim: self[dim].bounds for dim in self.udims} - @property - def area_bounds(self): - """:dict: Dictionary of (low, high) coordinates area_bounds in each unstacked dimension""" - return {dim: self[dim].area_bounds for dim in self.udims} - @property def coords(self): """ @@ -784,10 +771,9 @@ def coords(self): @property def CRS(self): - crs = self.crs - return pyproj.CRS(crs) + return pyproj.CRS(self.crs) - # TODO: add a convience property for displaying altitude units for the CRS + # TODO: add a convenience property for displaying altitude units for the CRS # @property # def alt_units(self): # CRS = self.CRS @@ -822,11 +808,8 @@ def full_definition(self): d = OrderedDict() d["coords"] = [c.full_definition for c in self._coords.values()] - d[ - "crs" - ] = ( - self.CRS.to_wkt() - ) # use "wkt" which is suggested as best format: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems + # "wkt" is suggested as best format: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems + d["crs"] = self.CRS.to_wkt() return d @property @@ -876,9 +859,9 @@ def geotransform(self): # Do the uniform coordinates case if ( "lat" in self.dims - and isinstance(self._coords["lat"], UniformCoordinates1d) and "lon" in self.dims - and isinstance(self._coords["lon"], UniformCoordinates1d) + and self._coords["lat"].is_uniform + and self._coords["lon"].is_uniform ): if self.dims.index("lon") < self.dims.index("lat"): first, second = "lat", "lon" @@ -908,6 +891,22 @@ def geotransform(self): # Methods # ------------------------------------------------------------------------------------------------------------------ + def get_area_bounds(self, boundary): + """Get coordinate area bounds, including segment information, for each unstacked dimension. + + Arguments + --------- + boundary : dict + dictionary of boundary offsets for each unstacked dimension. Non-segment dimensions can be omitted. + + Returns + ------- + area_bounds : dict + Dictionary of (low, high) coordinates area_bounds in each unstacked dimension + """ + + return {dim: self[dim].get_area_bounds(boundary.get(dim)) for dim in self.udims} + def drop(self, dims, ignore_missing=False): """ Remove the given dimensions from the Coordinates `dims`. @@ -943,7 +942,9 @@ def drop(self, dims, ignore_missing=False): if dim not in self.dims and not ignore_missing: raise KeyError("Dimension '%s' not found in Coordinates with dims %s" % (dim, self.dims)) - return Coordinates([c for c in self._coords.values() if c.name not in dims], **self.properties) + return Coordinates( + [c for c in self._coords.values() if c.name not in dims], validate_crs=False, **self.properties + ) # do we ever need this? def udrop(self, dims, ignore_missing=False): @@ -957,15 +958,15 @@ def udrop(self, dims, ignore_missing=False): In [2]: c Out[2]: Coordinates - lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 1.0], N[2], ctype['midpoint'] - lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 20.0], N[2], ctype['midpoint'] - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-01], N[1], ctype['midpoint'] + lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 1.0], N[2] + lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 20.0], N[2] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-01], N[1] In [3]: c.udrop('lat') Out[3]: Coordinates - lon: ArrayCoordinates1d(lon): Bounds[10.0, 20.0], N[2], ctype['midpoint'] - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-01], N[1], ctype['midpoint'] + lon: ArrayCoordinates1d(lon): Bounds[10.0, 20.0], N[2] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-01], N[1] Parameters ---------- @@ -1010,7 +1011,7 @@ def udrop(self, dims, ignore_missing=False): elif len(stacked) == 1: cs.append(stacked[0]) - return Coordinates(cs, **self.properties) + return Coordinates(cs, validate_crs=False, **self.properties) def intersect(self, other, dims=None, outer=False, return_indices=False): """ @@ -1093,22 +1094,22 @@ def select(self, bounds, return_indices=False, outer=False): In [2]: c.select({'lat': [1.5, 3.5]}) Out[2]: Coordinates - lat: ArrayCoordinates1d(lat): Bounds[2.0, 3.0], N[2], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[2.0, 3.0], N[2] + lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4] In [3]: c.select({'lat': [1.5, 3.5], 'lon': [25, 45]}) Out[3]: Coordinates - lat: ArrayCoordinates1d(lat): Bounds[2.0, 3.0], N[2], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[30.0, 40.0], N[2], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[2.0, 3.0], N[2] + lon: ArrayCoordinates1d(lon): Bounds[30.0, 40.0], N[2] The *outer* selection returns the minimal set of coordinates that contain the bounds:: In [4]: c.select({'lat':[1.5, 3.5]}, outer=True) Out[4]: Coordinates - lat: ArrayCoordinates1d(lat): Bounds[1.0, 3.0], N[3], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[1.0, 3.0], N[3] + lon: ArrayCoordinates1d(lon): Bounds[10.0, 40.0], N[4] Parameters ---------- @@ -1132,13 +1133,13 @@ def select(self, bounds, return_indices=False, outer=False): def _make_selected_coordinates(self, selections, return_indices): if return_indices: - coords = Coordinates([c for c, I in selections], **self.properties) + coords = Coordinates([c for c, I in selections], validate_crs=False, **self.properties) # unbundle DepedentCoordinates indices I = [I if isinstance(c, DependentCoordinates) else [I] for c, I in selections] I = [e for l in I for e in l] return coords, tuple(I) else: - return Coordinates(selections, **self.properties) + return Coordinates(selections, validate_crs=False, **self.properties) def unique(self, return_indices=False): """ @@ -1150,7 +1151,7 @@ def unique(self, return_indices=False): If True, return indices for the unique coordinates in addition to the coordinates. Default False. Returns ------- - coords : Coordinates + coords : :class:`podpac.Coordinates` New Coordinates object with unique, sorted coordinate values in each dimension. I : list of indices index for the unique coordinates in each dimension (only if return_indices=True) @@ -1169,7 +1170,7 @@ def unstack(self): Returns ------- - unstacked : :class:`Coordinates` + unstacked : :class:`podpac.Coordinates` A new Coordinates object with unstacked coordinates. See Also @@ -1177,7 +1178,7 @@ def unstack(self): xr.DataArray.unstack """ - return Coordinates([self[dim] for dim in self.udims], **self.properties) + return Coordinates([self[dim] for dim in self.udims], validate_crs=False, **self.properties) def iterchunks(self, shape, return_slices=False): """ @@ -1200,7 +1201,9 @@ def iterchunks(self, shape, return_slices=False): l = [[slice(i, i + n) for i in range(0, m, n)] for m, n in zip(self.shape, shape)] for slices in itertools.product(*l): - coords = Coordinates([self._coords[dim][slc] for dim, slc in zip(self.dims, slices)], **self.properties) + coords = Coordinates( + [self._coords[dim][slc] for dim, slc in zip(self.dims, slices)], validate_crs=False, **self.properties + ) if return_slices: yield coords, slices else: @@ -1256,9 +1259,9 @@ def transpose(self, *dims, **kwargs): self._coords = OrderedDict(zip(dims, coords)) return self else: - return Coordinates(coords, **self.properties) + return Coordinates(coords, validate_crs=False, **self.properties) - def transform(self, crs=None): + def transform(self, crs): """ Transform coordinate dimensions (`lat`, `lon`, `alt`) into a different coordinate reference system. Uses PROJ syntax for coordinate reference systems and units. @@ -1280,8 +1283,8 @@ def transform(self, crs=None): c.transform('EPSG:2193') >> Coordinates - lat: ArrayCoordinates1d(lat): Bounds[-9881992.849134896, 29995929.885877542], N[21], ctype['point'] - lon: ArrayCoordinates1d(lon): Bounds[1928928.7360588573, 4187156.434405213], N[21], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[-9881992.849134896, 29995929.885877542], N[21] + lon: ArrayCoordinates1d(lon): Bounds[1928928.7360588573, 4187156.434405213], N[21] Transform stacked coordinates:: @@ -1289,8 +1292,8 @@ def transform(self, crs=None): c.transform('EPSG:2193') >> Coordinates - lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[-9881992.849134896, 29995929.885877542], N[21], ctype['point'] - lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[1928928.7360588573, 4187156.434405213], N[21], ctype['midpoint'] + lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[-9881992.849134896, 29995929.885877542], N[21] + lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[1928928.7360588573, 4187156.434405213], N[21] Transform coordinates using a PROJ4 string:: @@ -1298,14 +1301,13 @@ def transform(self, crs=None): c.transform('+proj=merc +lat_ts=56.5 +ellps=GRS80') >> Coordinates - lat: ArrayCoordinates1d(lat): Bounds[-1847545.541169525, -615848.513723175], N[21], ctype['midpoint'] - lon: ArrayCoordinates1d(lon): Bounds[-614897.0725896168, 614897.0725896184], N[21], ctype['midpoint'] + lat: ArrayCoordinates1d(lat): Bounds[-1847545.541169525, -615848.513723175], N[21] + lon: ArrayCoordinates1d(lon): Bounds[-614897.0725896168, 614897.0725896184], N[21] Parameters ---------- - crs : str, optional + crs : str PROJ4 compatible coordinate reference system string. - Defaults to the current `crs` Returns ------- @@ -1317,48 +1319,49 @@ def transform(self, crs=None): ValueError Coordinates must have both lat and lon dimensions if either is defined """ - - if crs is None: - raise TypeError("transform requires crs argument") - - input_crs = crs - - # use self.crs by default - if crs is None: - crs = self.crs - from_crs = self.CRS to_crs = pyproj.CRS(crs) - # make sure to CRS defines vertical units - if "alt" in self.udims and not to_crs.is_vertical: - raise ValueError("Altitude dimension is defined, but CRS to transform does not contain vertical unit") - # no transform needed if from_crs == to_crs: return deepcopy(self) + # make sure the CRS defines vertical units + if "alt" in self.udims and not to_crs.is_vertical: + raise ValueError("Altitude dimension is defined, but CRS to transform does not contain vertical unit") + + if "lat" in self.udims and "lon" not in self.udims: + raise ValueError("Cannot transform lat coordinates without lon coordinates") + + if "lon" in self.udims and "lat" not in self.udims: + raise ValueError("Cannot transform lon coordinates without lat coordinates") + + if "lat" in self.dims and "lon" in self.dims and abs(self.dims.index("lat") - self.dims.index("lon")) != 1: + raise ValueError("Cannot transform coordinates with nonadjacent lat and lon, transpose first") + + transformer = pyproj.Transformer.from_proj(from_crs, to_crs, always_xy=True) + + # Collect the individual coordinates cs = [c for c in self.values()] - # if lat-lon transform is required, check dims and convert unstacked lat-lon coordinates if necessary - from_spatial = pyproj.CRS(self.crs) - to_spatial = pyproj.CRS(crs) - if from_spatial != to_spatial: - if "lat" in self.dims and "lon" in self.dims: + if "lat" in self.dims and "lon" in self.dims: + # try to do a simplified transform (resulting in unstacked lat-lon coordinates) + tc = self._simplified_transform(crs, transformer) + if tc: + cs[self.dims.index("lat")] = tc[0] + cs[self.dims.index("lon")] = tc[1] + + # otherwise convert lat-lon to dependent coordinates + else: ilat = self.dims.index("lat") ilon = self.dims.index("lon") if ilat == ilon - 1: c1, c2 = self["lat"], self["lon"] elif ilon == ilat - 1: c1, c2 = self["lon"], self["lat"] - else: - raise ValueError("Cannot transform coordinates with nonadjacent lat and lon, transpose first") c = DependentCoordinates( - np.meshgrid(c1.coordinates, c2.coordinates, indexing="ij"), - dims=[c1.name, c2.name], - ctypes=[c1.ctype, c2.ctype], - segment_lengths=[c1.segment_lengths, c2.segment_lengths], + np.meshgrid(c1.coordinates, c2.coordinates, indexing="ij"), dims=[c1.name, c2.name] ) # replace 'lat' and 'lon' entries with single 'lat,lon' entry @@ -1367,16 +1370,56 @@ def transform(self, crs=None): cs.pop(i) cs.insert(i, c) - elif "lat" in self.dims: - raise ValueError("Cannot transform lat coordinates without lon coordinates") + # transform + ts = [] + for c in cs: + tc = c._transform(transformer) + if isinstance(tc, list): + ts.extend(tc) + else: + ts.append(tc) - elif "lon" in self.dims: - raise ValueError("Cannot transform lon coordinates without lat coordinates") + return Coordinates(ts, crs=crs, validate_crs=False) - # transform - transformer = pyproj.Transformer.from_proj(from_crs, to_crs, always_xy=True) - ts = [c._transform(transformer) for c in cs] - return Coordinates(ts, crs=input_crs) + def _simplified_transform(self, crs, transformer): + """ Transform coordinates to simple Coordinates1d (instead of DependentCoordinates) if possible """ + + # check if we can simplify the coordinates by transforming a downsampled grid + sample = [np.linspace(self[dim].coordinates[0], self[dim].coordinates[-1], 5) for dim in ["lat", "lon"]] + temp_coords = DependentCoordinates(np.meshgrid(*sample, indexing="ij"), dims=["lat", "lon"]) + t = temp_coords._transform(transformer) + + # if we get DependentCoordinates from the transform, they are not independent + if isinstance(t, DependentCoordinates): + return + + # Great, we CAN simplify the transformed coordinates. + # If they are uniform already, we just need to expand to the full size + # If the are non-uniform, we have to compute the full transformed array + + # lat + if isinstance(t[0], UniformCoordinates1d): + t_lat = clinspace(t[0].coordinates[0], t[0].coordinates[-1], self["lat"].size, name="lat") + else: + # compute the non-uniform coordinates (and simplify to uniform if they are *now* uniform) + temp_coords = StackedCoordinates( + [self["lat"].coordinates, np.full_like(self["lat"].coordinates, self["lon"].coordinates.mean())], + name="lat_lon", + ) + t_lat = temp_coords._transform(transformer)["lat"].simplify() + + # lon + if isinstance(t[1], UniformCoordinates1d): + t_lon = clinspace(t[1].coordinates[0], t[1].coordinates[-1], self["lon"].size, name="lon") + else: + # compute the non-uniform coordinates (and simplify to uniform if they are *now* uniform) + temp_coords = StackedCoordinates( + [self["lon"].coordinates, np.full_like(self["lon"].coordinates, self["lat"].coordinates.mean())], + name="lon_lat", + ) + t_lon = temp_coords._transform(transformer)["lon"].simplify() + + return t_lat, t_lon # ------------------------------------------------------------------------------------------------------------------ # Operators/Magic Methods @@ -1424,7 +1467,7 @@ def merge_dims(coords_list): raise TypeError("Cannot merge '%s' with Coordinates" % type(coords)) if len(coords_list) == 0: - return Coordinates([]) + return Coordinates([], crs=None) # check crs crs = coords_list[0].crs @@ -1433,7 +1476,7 @@ def merge_dims(coords_list): # merge coords = sum([list(coords.values()) for coords in coords_list], []) - return Coordinates(coords, crs=crs) + return Coordinates(coords, crs=crs, validate_crs=False) def concat(coords_list): @@ -1461,7 +1504,7 @@ def concat(coords_list): raise TypeError("Cannot concat '%s' with Coordinates" % type(coords)) if not coords_list: - return Coordinates([]) + return Coordinates([], crs=None) # check crs crs = coords_list[0].crs @@ -1483,7 +1526,7 @@ def concat(coords_list): else: d[dim] = [np.concatenate([d[dim][i], s.coordinates]) for i, s in enumerate(c)] - return Coordinates(list(d.values()), dims=list(d.keys()), crs=crs) + return Coordinates(list(d.values()), dims=list(d.keys()), crs=crs, validate_crs=False) def union(coords_list): diff --git a/podpac/core/coordinates/coordinates1d.py b/podpac/core/coordinates/coordinates1d.py index bd3c85c88..b35e256dc 100644 --- a/podpac/core/coordinates/coordinates1d.py +++ b/podpac/core/coordinates/coordinates1d.py @@ -13,7 +13,7 @@ from podpac.core.utils import ArrayTrait from podpac.core.coordinates.utils import make_coord_value, make_coord_delta, make_coord_delta_array from podpac.core.coordinates.utils import add_coord, divide_delta, lower_precision_time_bounds -from podpac.core.coordinates.utils import Dimension, CoordinateType +from podpac.core.coordinates.utils import Dimension from podpac.core.coordinates.base_coordinates import BaseCoordinates @@ -21,19 +21,8 @@ class Coordinates1d(BaseCoordinates): """ Base class for 1-dimensional coordinates. - Coordinates1d objects contain values and metadata for a single dimension of coordinates. :class:`Coordinates` and + Coordinates1d objects contain values and metadata for a single dimension of coordinates. :class:`podpac.Coordinates` and :class:`StackedCoordinates` use Coordinate1d objects. - - The following coordinates types (``ctype``) are supported: - - * 'point': each coordinate represents a single location - * 'left': each coordinate is the left endpoint of its segment - * 'right': each coordinate is the right endpoint of its endpoint - * 'midpoint': segment endpoints are at the midpoints between coordinate values. - - The ``bounds`` are always the low and high coordinate value. For *point* coordinates, the ``area_bounds`` are the - same as the ``bounds``. For *segment* coordinates (left, right, and midpoint), the ``area_bounds`` include the - portion of the segments above and below the ``bounds`. Parameters ---------- @@ -41,72 +30,19 @@ class Coordinates1d(BaseCoordinates): Dimension name, one of 'lat', 'lon', 'time', or 'alt'. coordinates : array, read-only Full array of coordinate values. - ctype : str - Coordinates type: 'point', 'left', 'right', or 'midpoint'. - segment_lengths : array, float, timedelta - When ctype is a segment type, the segment lengths for the coordinates. This may be single coordinate delta for - uniform segment lengths or an array of coordinate deltas corresponding to the coordinates for variable lengths. - + See Also -------- :class:`ArrayCoordinates1d`, :class:`UniformCoordinates1d` """ name = Dimension(allow_none=True) - ctype = CoordinateType(read_only=True) - segment_lengths = tl.Any(read_only=True) - _properties = tl.Set() - def __init__(self, name=None, ctype=None, segment_lengths=None): - """*Do not use.*""" - - if name is not None: - self.name = name - - if ctype is not None: - self.set_trait("ctype", ctype) - - if segment_lengths is not None: - if np.array(segment_lengths).ndim == 0: - segment_lengths = make_coord_delta(segment_lengths) - else: - segment_lengths = make_coord_delta_array(segment_lengths) - segment_lengths.setflags(write=False) - - self.set_trait("segment_lengths", segment_lengths) - - super(Coordinates1d, self).__init__() - - @tl.observe("name", "ctype", "segment_lengths") + @tl.observe("name") def _set_property(self, d): - self._properties.add(d["name"]) - - @tl.validate("segment_lengths") - def _validate_segment_lengths(self, d): - val = d["value"] - - if self.ctype == "point": - if val is not None: - raise TypeError("segment_lengths must be None when ctype='point'") - return None - - if isinstance(val, np.ndarray): - if val.size != self.size: - raise ValueError("coordinates and segment_lengths size mismatch, %d != %d" % (self.size, val.size)) - if not np.issubdtype(val.dtype, np.dtype(self.deltatype).type): - raise ValueError( - "coordinates and segment_lengths dtype mismatch, %s != %s" % (self.dtype, self.deltatype) - ) - - else: - if self.size > 0 and not isinstance(val, self.deltatype): - raise TypeError("coordinates and segment_lengths type mismatch, %s != %s" % (self.deltatype, type(val))) - - if np.any(np.array(val).astype(float) <= 0.0): - raise ValueError("segment_lengths must be positive") - - return val + if d["name"] is not None: + self._properties.add(d["name"]) def _set_name(self, value): # set name if it is not set already, otherwise check that it matches @@ -115,23 +51,17 @@ def _set_name(self, value): elif self.name != value: raise ValueError("Dimension mismatch, %s != %s" % (value, self.name)) - def _set_ctype(self, value): - # only set ctype if it is not set already - if "ctype" not in self._properties: - self.set_trait("ctype", value) - # ------------------------------------------------------------------------------------------------------------------ # standard methods # ------------------------------------------------------------------------------------------------------------------ def __repr__(self): - return "%s(%s): Bounds[%s, %s], N[%d], ctype['%s']" % ( + return "%s(%s): Bounds[%s, %s], N[%d]" % ( self.__class__.__name__, self.name or "?", self.bounds[0], self.bounds[1], self.size, - self.ctype, ) def __eq__(self, other): @@ -140,11 +70,7 @@ def __eq__(self, other): # defined coordinate properties should match for name in self._properties.union(other._properties): - if name == "segment_lengths": - if not np.all(self.segment_lengths == other.segment_lengths): - return False - - elif getattr(self, name) != getattr(other, name): + if getattr(self, name) != getattr(other, name): return False # shortcuts (not strictly necessary) @@ -211,48 +137,22 @@ def is_uniform(self): raise NotImplementedError @property - def bounds(self): - """ Low and high coordinate bounds. """ - + def start(self): raise NotImplementedError @property - def area_bounds(self): - """ - Low and high coordinate area bounds. - - When ctype != 'point', this includes the portions of the segments beyond the coordinate bounds. - """ - - # point ctypes, just use bounds - if self.ctype == "point": - return self.bounds - - # empty coordinates [np.nan, np.nan] - if self.size == 0: - return self.bounds - - # segment ctypes, calculated - L, H = self.argbounds - lo, hi = self.bounds + def stop(self): + raise NotImplementedError - if not isinstance(self.segment_lengths, np.ndarray): - lo_length = hi_length = self.segment_lengths # uniform segment_lengths - else: - lo_length, hi_length = self.segment_lengths[L], self.segment_lengths[H] + @property + def step(self): + raise NotImplementedError - if self.ctype == "left": - hi = add_coord(hi, hi_length) - elif self.ctype == "right": - lo = add_coord(lo, -lo_length) - elif self.ctype == "midpoint": - lo = add_coord(lo, -divide_delta(lo_length, 2.0)) - hi = add_coord(hi, divide_delta(hi_length, 2.0)) + @property + def bounds(self): + """ Low and high coordinate bounds. """ - # read-only array with the correct dtype - area_bounds = np.array([lo, hi], dtype=self.dtype) - area_bounds.setflags(write=False) - return area_bounds + raise NotImplementedError @property def properties(self): @@ -275,7 +175,7 @@ def _get_definition(self, full=True): @property def _full_properties(self): - return {"name": self.name, "ctype": self.ctype, "segment_lengths": self.segment_lengths} + return {"name": self.name} # ------------------------------------------------------------------------------------------------------------------ # Methods @@ -293,6 +193,71 @@ def copy(self): raise NotImplementedError + def simplify(self): + """ Get the simplified/optimized representation of these coordinates. + + Returns + ------- + simplified : Coordinates1d + simplified version of the coordinates + """ + + raise NotImplementedError + + def get_area_bounds(self, boundary): + """ + Get low and high coordinate area bounds. + + Arguments + --------- + boundary : float, timedelta, array, None + Boundary offsets in this dimension. + + * For a centered uniform boundary (same for every coordinate), use a single positive float or timedelta + offset. This represents the "total segment length" / 2. + * For a uniform boundary (segment or polygon same for every coordinate), use an array of float or + timedelta offsets + * For a fully specified boundary, use an array of boundary arrays (2-D array, N_coords x boundary spec), + one per coordinate. The boundary_spec can be a single number, two numbers, or an array of numbers. + * For point coordinates, use None. + + Returns + ------- + low: float, np.datetime64 + low area bound + high: float, np.datetime64 + high area bound + """ + + # point coordinates + if boundary is None: + return self.bounds + + # empty coordinates + if self.size == 0: + return self.bounds + + if np.array(boundary).ndim == 0: + # shortcut for uniform centered boundary + boundary = make_coord_delta(boundary) + lo_offset = -boundary + hi_offset = boundary + elif np.array(boundary).ndim == 1: + # uniform boundary polygon + boundary = make_coord_delta_array(boundary) + lo_offset = min(boundary) + hi_offset = max(boundary) + else: + L, H = self.argbounds + lo_offset = min(make_coord_delta_array(boundary[L])) + hi_offset = max(make_coord_delta_array(boundary[H])) + + lo, hi = self.bounds + lo = add_coord(lo, lo_offset) + hi = add_coord(hi, hi_offset) + + return lo, hi + def _select_empty(self, return_indices): I = [] if return_indices: @@ -371,7 +336,7 @@ def select(self, bounds, return_indices=False, outer=False): "Input bounds do match the coordinates dtype (%s != %s)" % (type(self.bounds[1]), self.dtype) ) - my_bounds = self.area_bounds.copy() + my_bounds = self.bounds # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision if self.dtype == np.datetime64: @@ -392,24 +357,12 @@ def _select(self, bounds, return_indices, outer): raise NotImplementedError def _transform(self, transformer): - from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d - - if self.name == "alt": - # coordinates - _, _, tcoordinates = transformer.transform(np.zeros(self.size), np.zeros(self.size), self.coordinates) + if self.name != "alt": + # this assumes that the transformer does not have a spatial transform + return self.copy() - # segment lengths - properties = self.properties - if self.ctype != "point" and "segment_lengths" in self.properties: - _ = np.zeros_like(self.segment_lengths) - _, _, tsl = transformer.transform(_, _, self.segment_lengths) - properties["segment_lengths"] = tsl - - t = ArrayCoordinates1d(tcoordinates, **properties) - - else: - # this assumes that the transformer has been checked and that if this is a lat or lon dimension, the - # transformer must not have a spatial transform - t = self.copy() + # transform "alt" coordinates + from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d - return t + _, _, tcoordinates = transformer.transform(np.zeros(self.size), np.zeros(self.size), self.coordinates) + return ArrayCoordinates1d(tcoordinates, **self.properties) diff --git a/podpac/core/coordinates/dependent_coordinates.py b/podpac/core/coordinates/dependent_coordinates.py index 439ea03b0..f0434c0c5 100644 --- a/podpac/core/coordinates/dependent_coordinates.py +++ b/podpac/core/coordinates/dependent_coordinates.py @@ -11,12 +11,13 @@ from podpac.core.settings import settings from podpac.core.utils import ArrayTrait, TupleTrait -from podpac.core.coordinates.utils import Dimension, CoordinateType +from podpac.core.coordinates.utils import Dimension from podpac.core.coordinates.utils import make_coord_array, make_coord_value, make_coord_delta from podpac.core.coordinates.base_coordinates import BaseCoordinates from podpac.core.coordinates.coordinates1d import Coordinates1d from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d from podpac.core.coordinates.stacked_coordinates import StackedCoordinates +from podpac.core.coordinates.cfunctions import clinspace class DependentCoordinates(BaseCoordinates): @@ -55,12 +56,10 @@ class DependentCoordinates(BaseCoordinates): coordinates = TupleTrait(trait=ArrayTrait(), read_only=True) dims = TupleTrait(trait=Dimension(allow_none=True), read_only=True) idims = TupleTrait(trait=tl.Unicode(), read_only=True) - ctypes = TupleTrait(trait=CoordinateType(), read_only=True) - segment_lengths = TupleTrait(trait=tl.Any(allow_none=True), read_only=True) _properties = tl.Set() - def __init__(self, coordinates, dims=None, ctypes=None, segment_lengths=None): + def __init__(self, coordinates, dims=None): """ Create dependent coordinates manually. You should not need to use this class directly. @@ -70,28 +69,13 @@ def __init__(self, coordinates, dims=None, ctypes=None, segment_lengths=None): tuple of coordinate values for each dimension, each the same shape. dims : tuple (optional) tuple of dimension names ('lat', 'lon', 'time', or 'alt'). - ctype : tuple, str (optional) - tuple of coordinates types ('point', 'left', 'right', or 'midpoint') for each dimension. A single ctype - str can be specified for all dimensions. - segment_lengths : tuple, float, or timedelta (optional) - tuple of segment lengths for each dimension. A single segment length can be specified for all dimensions. - For point coordinates, the segment_lengths must be None; omit if all dimensions are point coordinates. """ coordinates = [np.array(a) for a in coordinates] coordinates = [make_coord_array(a.flatten()).reshape(a.shape) for a in coordinates] self.set_trait("coordinates", coordinates) - self._set_properties(dims, ctypes, segment_lengths) - - def _set_properties(self, dims, ctypes, segment_lengths): if dims is not None: self.set_trait("dims", dims) - if ctypes is not None: - self._set_ctype(ctypes) - if segment_lengths is not None: - self._set_segment_lengths(segment_lengths) - else: - self.segment_lengths # force validation @tl.default("dims") def _default_dims(self): @@ -101,14 +85,6 @@ def _default_dims(self): def _default_idims(self): return tuple("ijkl")[: self.ndims] - @tl.default("ctypes") - def _default_ctype(self): - return tuple("point" for dim in self.dims) - - @tl.default("segment_lengths") - def _default_segment_lengths(self): - return tuple(None for dim in self.dims) - @tl.validate("coordinates") def _validate_coordinates(self, d): val = d["value"] @@ -122,35 +98,22 @@ def _validate_coordinates(self, d): @tl.validate("dims") def _validate_dims(self, d): - val = self._validate_sizes(d) + val = d["value"] + if len(val) != self.ndims: + raise ValueError("dims and coordinates size mismatch, %d != %d" % (len(val), self.ndims)) for i, dim in enumerate(val): if dim is not None and dim in val[:i]: raise ValueError("Duplicate dimension '%s' in stacked coords" % dim) return val - @tl.validate("segment_lengths") - def _validate_segment_lengths(self, d): - val = self._validate_sizes(d) - for i, (segment_lengths, ctype) in enumerate(zip(val, self.ctypes)): - if segment_lengths is None: - if ctype != "point": - raise TypeError("segment_lengths cannot be None for '%s' coordinates at position %d" % (ctype, i)) - else: - if ctype == "point": - raise TypeError("segment_lengths must be None for '%s' coordinates at position %d" % (ctype, i)) - if segment_lengths <= 0.0: - raise ValueError("segment_lengths must be positive at pos %d" % i) + @tl.validate("idims") + def _validate_idims(self, d): + val = d["value"] + if len(val) != self.ndims: + raise ValueError("idims and coordinates size mismatch, %d != %d" % (len(val), self.ndims)) return val - @tl.validate("idims", "ctypes") - def _validate_sizes(self, d): - if len(d["value"]) != self.ndims: - raise ValueError( - "%s and coordinates size mismatch, %d != %d" % (d["trait"].name, len(d["value"]), self.ndims) - ) - return d["value"] - - @tl.observe("dims", "idims", "ctypes", "segment_lengths") + @tl.observe("dims", "idims") def _set_property(self, d): self._properties.add(d["name"]) @@ -162,20 +125,6 @@ def _set_name(self, value): elif self.name != value: raise ValueError("Dimension mismatch, %s != %s" % (value, self.name)) - def _set_ctype(self, value): - # only set ctypes if they are not set already - if "ctypes" not in self._properties: - if isinstance(value, string_types): - self.set_trait("ctypes", tuple(value for dim in self.dims)) - else: - self.set_trait("ctypes", value) - - def _set_segment_lengths(self, value): - if isinstance(value, numbers.Number): - value = tuple(value for dim in self.dims) - value = tuple(make_coord_delta(sl) if sl is not None else None for sl in value) - self.set_trait("segment_lengths", value) - # ------------------------------------------------------------------------------------------------------------------ # Alternate Constructors # ------------------------------------------------------------------------------------------------------------------ @@ -224,16 +173,14 @@ def _rep(self, dim, index=None): dim = "?" # unnamed dimensions c = self.coordinates[index] - ctype = self.ctypes[index] bounds = np.min(c), np.max(c) - return "%s(%s->%s): Bounds[%f, %f], shape%s, ctype[%s]" % ( + return "%s(%s->%s): Bounds[%s, %s], shape%s" % ( self.__class__.__name__, ",".join(self.idims), dim, bounds[0], bounds[1], self.shape, - ctype, ) def __eq__(self, other): @@ -283,10 +230,6 @@ def _properties_at(self, index=None, dim=None): index = self.dims.index(dim) properties = {} properties["name"] = self.dims[index] - if "ctypes" in self._properties: - properties["ctype"] = self.ctypes[index] - if self.ctypes[index] != "point": - properties["segment_lengths"] = self.segment_lengths[index] return properties # ----------------------------------------------------------------------------------------------------------------- @@ -333,13 +276,6 @@ def bounds(self): raise ValueError("Cannot get bounds for DependentCoordinates with un-named dimensions") return {dim: self[dim].bounds for dim in self.dims} - @property - def area_bounds(self): - """:dict: Dictionary of (low, high) coordinates area_bounds in each unstacked dimension""" - if None in self.dims: - raise ValueError("Cannot get area_bounds for DependentCoordinates with un-named dimensions") - return {dim: self[dim].area_bounds for dim in self.dims} - @property def coords(self): """:dict-like: xarray coordinates (container of coordinate arrays)""" @@ -374,7 +310,7 @@ def _get_definition(self, full=True): @property def _full_properties(self): - return {"dims": self.dims, "ctypes": self.ctypes, "segment_lengths": self.segment_lengths} + return {"dims": self.dims} # ------------------------------------------------------------------------------------------------------------------ # Methods @@ -392,6 +328,24 @@ def copy(self): return DependentCoordinates(self.coordinates, **self.properties) + def get_area_bounds(self, boundary): + """Get coordinate area bounds, including boundary information, for each unstacked dimension. + + Arguments + --------- + boundary : dict + dictionary of boundary offsets for each unstacked dimension. Point dimensions can be omitted. + + Returns + ------- + area_bounds : dict + Dictionary of (low, high) coordinates area_bounds in each unstacked dimension + """ + + if None in self.dims: + raise ValueError("Cannot get area_bounds for DependentCoordinates with un-named dimensions") + return {dim: self[dim].get_area_bounds(boundary.get(dim)) for dim in self.dims} + def select(self, bounds, outer=False, return_indices=False): """ Get the coordinate values that are within the given bounds in all dimensions. @@ -460,7 +414,6 @@ def _transform(self, transformer): ilon = self.dims.index("lon") ialt = self.dims.index("alt") - # coordinates lat = coords[ilat].flatten() lon = coords[ilon].flatten() alt = coords[ialt].flatten() @@ -469,48 +422,38 @@ def _transform(self, transformer): coords[ilon] = tlon.reshape(self.shape) coords[ialt] = talt.reshape(self.shape) - # segment lengths - # TODO can we use '+units' here, at least sometimes? - if self.ctypes[ilat] != "point": - warnings.warn("transformation of coordinate segment lengths not yet implemented") - if self.ctypes[ilon] != "point": - warnings.warn("transformation of coordinate segment lengths not yet implemented") - if self.ctypes[ialt] != "point": - _, _, tsl = transformer.transform(0, 0, self.segment_lengths[ialt]) - properties["segment_lengths"][ialt] = tsl - elif "lat" in self.dims and "lon" in self.dims: ilat = self.dims.index("lat") ilon = self.dims.index("lon") - # coordinates lat = coords[ilat].flatten() lon = coords[ilon].flatten() tlon, tlat = transformer.transform(lon, lat) coords[ilat] = tlat.reshape(self.shape) coords[ilon] = tlon.reshape(self.shape) - # segment lengths - # TODO can we use '+units' here, at least sometimes? - if self.ctypes[ilat] != "point": - warnings.warn("transformation of coordinate segment lengths not yet implemented") - if self.ctypes[ilon] != "point": - warnings.warn("transformation of coordinate segment lengths not yet implemented") - elif "alt" in self.dims: ialt = self.dims.index("alt") - # coordinates alt = coords[ialt].flatten() _, _, talt = transformer.transform(np.zeros(self.size), np.zeros(self.size), alt) coords[ialt] = talt.reshape(self.shape) - # segment lengths - if self.ctypes[ialt] != "point": - _, _, tsl = transformer.transform(0, 0, self.segment_lengths[ialt]) - properties["segment_lengths"][ialt] = tsl + return DependentCoordinates(coords, **properties).simplify() + + def simplify(self): + coords = [c.copy() for c in self.coordinates] + slc_start = [slice(0, 1) for d in self.dims] + + for dim in self.dims: + i = self.dims.index(dim) + slc = slc_start.copy() + slc[i] = slice(None) + if dim in ["lat", "lon"] and not np.allclose(coords[i][tuple(slc)], coords[i], atol=1e-7): + return self + coords[i] = ArrayCoordinates1d(coords[i][tuple(slc)].squeeze(), name=dim).simplify() - return DependentCoordinates(coords, **properties) + return coords def transpose(self, *dims, **kwargs): """ @@ -568,7 +511,7 @@ class ArrayCoordinatesNd(ArrayCoordinates1d): """ Partial implementation for internal use. - Provides name, dtype, size, bounds, area_bounds (and others). + Provides name, dtype, size, bounds (and others). Prohibits coords, intersect, select (and others). Used primarily for intersection with DependentCoordinates. @@ -576,7 +519,7 @@ class ArrayCoordinatesNd(ArrayCoordinates1d): coordinates = ArrayTrait(read_only=True) - def __init__(self, coordinates, name=None, ctype=None, segment_lengths=None): + def __init__(self, coordinates, name=None): """ Create shaped array coordinates. You should not need to use this class directly. @@ -586,10 +529,6 @@ def __init__(self, coordinates, name=None, ctype=None, segment_lengths=None): coordinate values. name : str, optional Dimension name, one of 'lat', 'lon', 'time', or 'alt'. - ctype : str, optional - Coordinates type: 'point', 'left', 'right', or 'midpoint'. - segment_lengths: float or timedelta, optional - When ctype is a segment type, the segment lengths for the coordinates. """ self.set_trait("coordinates", coordinates) @@ -597,16 +536,15 @@ def __init__(self, coordinates, name=None, ctype=None, segment_lengths=None): self._is_descending = None self._is_uniform = None - Coordinates1d.__init__(self, name=name, ctype=ctype, segment_lengths=segment_lengths) + Coordinates1d.__init__(self, name=name) def __repr__(self): - return "%s(%s): Bounds[%f, %f], shape%s, ctype['%s']" % ( + return "%s(%s): Bounds[%s, %s], shape%s" % ( self.__class__.__name__, self.name or "?", self.bounds[0], self.bounds[1], self.shape, - self.ctype, ) @property diff --git a/podpac/core/coordinates/polar_coordinates.py b/podpac/core/coordinates/polar_coordinates.py index 53e897ac0..6eb371fb8 100644 --- a/podpac/core/coordinates/polar_coordinates.py +++ b/podpac/core/coordinates/polar_coordinates.py @@ -21,7 +21,7 @@ class PolarCoordinates(DependentCoordinates): theta = tl.Instance(Coordinates1d, read_only=True) ndims = 2 - def __init__(self, center, radius, theta=None, theta_size=None, dims=None, ctypes=None, segment_lengths=None): + def __init__(self, center, radius, theta=None, theta_size=None, dims=None): # radius if not isinstance(radius, Coordinates1d): @@ -41,9 +41,8 @@ def __init__(self, center, radius, theta=None, theta_size=None, dims=None, ctype self.set_trait("center", center) self.set_trait("radius", radius) self.set_trait("theta", theta) - - # properties - self._set_properties(dims, ctypes, segment_lengths) + if dims is not None: + self.set_trait("dims", dims) @tl.validate("dims") def _validate_dims(self, d): @@ -107,12 +106,7 @@ def from_definition(cls, d): # ------------------------------------------------------------------------------------------------------------------ def __repr__(self): - if self.ctypes[0] == self.ctypes[1]: - ctypes = "ctype['%s']" % self.ctypes[0] - else: - ctypes = "ctypes[%s]" % ", ".join(self.ctypes) - - return "%s(%s): center%s, shape%s, %s" % (self.__class__.__name__, self.dims, self.center, self.shape, ctypes) + return "%s(%s): center%s, shape%s" % (self.__class__.__name__, self.dims, self.center, self.shape) def __eq__(self, other): if not isinstance(other, PolarCoordinates): diff --git a/podpac/core/coordinates/rotated_coordinates.py b/podpac/core/coordinates/rotated_coordinates.py index a911f400c..b01871b2e 100644 --- a/podpac/core/coordinates/rotated_coordinates.py +++ b/podpac/core/coordinates/rotated_coordinates.py @@ -47,9 +47,7 @@ class RotatedCoordinates(DependentCoordinates): step = ArrayTrait(shape=(2,), dtype=float, read_only=True) ndims = 2 - def __init__( - self, shape=None, theta=None, origin=None, step=None, corner=None, dims=None, ctypes=None, segment_lengths=None - ): + def __init__(self, shape=None, theta=None, origin=None, step=None, corner=None, dims=None): """ Create a grid of rotated coordinates from a `shape`, `theta`, `origin`, and `step` or `corner`. @@ -67,12 +65,6 @@ def __init__( Scaling, ie rotated distance between points in the grid, in each dimension. (corner or step required) dims : tuple (required) tuple of dimension names ('lat', 'lon', 'time', or 'alt'). - ctype : tuple, str (optional) - tuple of coordinates types ('point', 'left', 'right', or 'midpoint') for each dimension. A single ctype - str can be specified for all dimensions. - segment_lengths : tuple, float, or timedelta (optional) - tuple of segment lengths for each dimension. A single segment length can be specified for both dimensions. - For point coordinates, the segment_lengths must be None; omit if all dimensions are point coordinates. """ self.set_trait("shape", shape) @@ -84,8 +76,8 @@ def __init__( d = np.array(a * corner) - np.array(a * origin) step = d / np.array([shape[0] - 1, shape[1] - 1]) self.set_trait("step", step) - - self._set_properties(dims, ctypes, segment_lengths) + if dims is not None: + self.set_trait("dims", dims) @tl.validate("dims") def _validate_dims(self, d): @@ -114,14 +106,14 @@ def _validate_step(self, d): # ------------------------------------------------------------------------------------------------------------------ @classmethod - def from_geotransform(cls, geotransform, shape, dims=None, ctypes=None, segment_lengths=None): + def from_geotransform(cls, geotransform, shape, dims=None): affine = rasterio.Affine.from_gdal(*geotransform) origin = affine.f, affine.c deg = affine.rotation_angle scale = ~affine.rotation(deg) * ~affine.translation(*origin) * affine step = np.array([scale.e, scale.a]) origin = affine.f + step[0] / 2, affine.c + step[1] / 2 - return cls(shape, np.deg2rad(deg), origin, step, dims=dims, ctypes=ctypes, segment_lengths=segment_lengths) + return cls(shape, np.deg2rad(deg), origin, step, dims=dims) @classmethod def from_definition(cls, d): @@ -165,19 +157,13 @@ def from_definition(cls, d): # ------------------------------------------------------------------------------------------------------------------ def __repr__(self): - if self.ctypes[0] == self.ctypes[1]: - ctypes = "ctype['%s']" % self.ctypes[0] - else: - ctypes = "ctypes[%s]" % ", ".join(self.ctypes) - - return "%s(%s): Origin%s, Corner%s, rad[%.4f], shape%s, %s" % ( + return "%s(%s): Origin%s, Corner%s, rad[%.4f], shape%s" % ( self.__class__.__name__, self.dims, self.origin, self.corner, self.theta, self.shape, - ctypes, ) def __eq__(self, other): @@ -261,12 +247,6 @@ def properties(self): """:dict: Dictionary of the coordinate properties. """ return {key: getattr(self, key) for key in self._properties} - @property - def area_bounds(self): - """:dict: Dictionary of (low, high) coordinates area_bounds in each unstacked dimension""" - # TODO this is not accurate, the segment lengths need to be rotated - return super(RotatedCoordinates, self).area_bounds - def _get_definition(self, full=True): d = OrderedDict() d["dims"] = self.dims @@ -292,6 +272,24 @@ def copy(self): """ return RotatedCoordinates(self.shape, self.theta, self.origin, self.step, **self.properties) + def get_area_bounds(self, boundary): + """Get coordinate area bounds, including boundary information, for each unstacked dimension. + + Arguments + --------- + boundary : dict + dictionary of boundary offsets for each unstacked dimension. Point dimensions can be omitted. + + Returns + ------- + area_bounds : dict + Dictionary of (low, high) coordinates area_bounds in each unstacked dimension + """ + + # TODO the boundary offsets need to be rotated + warnings.warning("RotatedCoordinates area_bounds are not yet correctly implemented.") + return super(RotatedCoordinates, self).get_area_bounds(boundary) + def select(self, bounds, outer=False, return_indices=False): """ Get the coordinate values that are within the given bounds in all dimensions. diff --git a/podpac/core/coordinates/stacked_coordinates.py b/podpac/core/coordinates/stacked_coordinates.py index 5ae6c8b16..3d5b253af 100644 --- a/podpac/core/coordinates/stacked_coordinates.py +++ b/podpac/core/coordinates/stacked_coordinates.py @@ -32,9 +32,9 @@ class StackedCoordinates(BaseCoordinates): >>> time = ['2018-01-01', '2018-01-02'] >>> podpac.Coordinates([[lat, lon], time], dims=['lat_lon', 'time']) Coordinates - lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3], ctype['midpoint'] - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2], ctype['midpoint'] + lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2] For convenience, you can also create uniformly-spaced stacked coordinates using :class:`clinspace`:: @@ -42,9 +42,9 @@ class StackedCoordinates(BaseCoordinates): >>> time = ['2018-01-01', '2018-01-02'] >>> podpac.Coordinates([lat_lon, time], dims=['lat_lon', 'time']) Coordinates - lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3], ctype['midpoint'] - lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3], ctype['midpoint'] - time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2], ctype['midpoint'] + lat_lon[lat]: ArrayCoordinates1d(lat): Bounds[0.0, 2.0], N[3] + lat_lon[lon]: ArrayCoordinates1d(lon): Bounds[10.0, 30.0], N[3] + time: ArrayCoordinates1d(time): Bounds[2018-01-01, 2018-01-02], N[2] Parameters ---------- @@ -61,7 +61,7 @@ class StackedCoordinates(BaseCoordinates): _coords = tl.List(trait=tl.Instance(Coordinates1d), read_only=True) - def __init__(self, coords, name=None, dims=None, ctype=None): + def __init__(self, coords, name=None, dims=None): """ Initialize a multidimensional coords bject. @@ -69,8 +69,6 @@ def __init__(self, coords, name=None, dims=None, ctype=None): ---------- coords : list, :class:`StackedCoordinates` Coordinate values in a list, or a StackedCoordinates object to copy. - ctype : str, optional - Default coordinates type. See Also -------- @@ -96,8 +94,6 @@ def __init__(self, coords, name=None, dims=None, ctype=None): self._set_dims(dims) if name is not None: self._set_name(name) - if ctype is not None: - self._set_ctype(ctype) # finalize super(StackedCoordinates, self).__init__() @@ -140,16 +136,12 @@ def _set_dims(self, dims): continue c._set_name(dim) - def _set_ctype(self, value): - for c in self._coords: - c._set_ctype(value) - # ------------------------------------------------------------------------------------------------------------------ # Alternate constructors # ------------------------------------------------------------------------------------------------------------------ @classmethod - def from_xarray(cls, xcoords, ctype=None): + def from_xarray(cls, xcoords): """ Convert an xarray coord to StackedCoordinates @@ -157,8 +149,6 @@ def from_xarray(cls, xcoords, ctype=None): ---------- xcoords : DataArrayCoordinates xarray coords attribute to convert - ctype : str, optional - Default coordinates type. Returns ------- @@ -168,7 +158,7 @@ def from_xarray(cls, xcoords, ctype=None): dims = xcoords.indexes[xcoords.dims[0]].names coords = [ArrayCoordinates1d.from_xarray(xcoords[dims]) for dims in dims] - return cls(coords, ctype=ctype) + return cls(coords) @classmethod def from_definition(cls, d): @@ -312,13 +302,6 @@ def bounds(self): raise ValueError("Cannot get bounds for StackedCoordinates with un-named dimensions") return {dim: self[dim].bounds for dim in self.udims} - @property - def area_bounds(self): - """:dict: Dictionary of (low, high) coordinates area_bounds in each dimension""" - if None in self.dims: - raise ValueError("Cannot get area_bounds for StackedCoordinates with un-named dimensions") - return {dim: self[dim].area_bounds for dim in self.udims} - @property def coordinates(self): """:pandas.MultiIndex: MultiIndex of stacked coordinates values.""" @@ -366,7 +349,25 @@ def copy(self): return StackedCoordinates(self._coords) - def select(self, bounds, return_indices=False, outer=False): + def get_area_bounds(self, boundary): + """Get coordinate area bounds, including boundary information, for each unstacked dimension. + + Arguments + --------- + boundary : dict + dictionary of boundary offsets for each unstacked dimension. Point dimensions can be omitted. + + Returns + ------- + area_bounds : dict + Dictionary of (low, high) coordinates area_bounds in each unstacked dimension + """ + + if None in self.dims: + raise ValueError("Cannot get area_bounds for StackedCoordinates with un-named dimensions") + return {dim: self[dim].get_area_bounds(boundary.get(dim)) for dim in self.dims} + + def select(self, bounds, outer=False, return_indices=False): """ Get the coordinate values that are within the given bounds in all dimensions. @@ -425,57 +426,33 @@ def _transform(self, transformer): ilon = self.dims.index("lon") ialt = self.dims.index("alt") - # coordinates lat = coords[ilat] lon = coords[ilon] alt = coords[ialt] tlon, tlat, talt = transformer.transform(lon.coordinates, lat.coordinates, alt.coordinates) - coords[ilat].set_trait("coordinates", tlat) - coords[ilon].set_trait("coordinates", tlon) - coords[ialt].set_trait("coordinates", talt) - - # segment lengths - # TODO can we use the proj4 '+units' here, at least sometimes? - if lat.ctype != "point" and "segment_lengths" in lat.properties: - warnings.warn("transformation of coordinate segment lengths not yet implemented") - if lon.ctype != "point" and "segment_lengths" in lon.properties: - warnings.warn("transformation of coordinate segment lengths not yet implemented") - if alt.ctype != "point" and "segment_lengths" in lon.properties: - sl = alt.segment_lengths - _, _, tsl = transformer.transform(np.zeros_like(sl), np.zeros_like(sl), sl) - coords[ialt].set_trait("segment_lengths", tsl) + + coords[ilat] = ArrayCoordinates1d(tlat, "lat").simplify() + coords[ilon] = ArrayCoordinates1d(tlon, "lon").simplify() + coords[ialt] = ArrayCoordinates1d(talt, "alt").simplify() elif "lat" in self.dims and "lon" in self.dims: ilat = self.dims.index("lat") ilon = self.dims.index("lon") - # coordinates lat = coords[ilat] lon = coords[ilon] tlon, tlat = transformer.transform(lon.coordinates, lat.coordinates) - coords[ilat].set_trait("coordinates", tlat) - coords[ilon].set_trait("coordinates", tlon) - # segment lengths - # TODO can we use proj4 '+units' here, at least sometimes? - if lat.ctype != "point" and "segment_lengths" in lat.properties: - warnings.warn("transformation of coordinate segment lengths not yet implemented") - if lon.ctype != "point" and "segment_lengths" in lon.properties: - warnings.warn("transformation of coordinate segment lengths not yet implemented") + coords[ilat] = ArrayCoordinates1d(tlat, "lat").simplify() + coords[ilon] = ArrayCoordinates1d(tlon, "lon").simplify() elif "alt" in self.dims: ialt = self.dims.index("alt") - # coordinates alt = coords[ialt] _, _, talt = transformer.transform(np.zeros(self.size), np.zeros(self.size), alt.coordinates) - coords[ialt].set_trait("coordinates", talt) - # segment lengths - if alt.ctype != "point" and "segment_lengths" in lon.properties: - sl = alt.segment_lengths - _, _, tsl = transformer.transform(np.zeros_like(sl), np.zeros_like(sl), sl) - coords[ialt].set_trait("segment_lengths", tsl) + coords[ialt] = ArrayCoordinates1d(talt, "alt").simplify() return StackedCoordinates(coords) diff --git a/podpac/core/coordinates/test/test_array_coordinates1d.py b/podpac/core/coordinates/test/test_array_coordinates1d.py index 2892232b4..dce5a8157 100644 --- a/podpac/core/coordinates/test/test_array_coordinates1d.py +++ b/podpac/core/coordinates/test/test_array_coordinates1d.py @@ -8,6 +8,7 @@ from numpy.testing import assert_equal import podpac +from podpac.core.coordinates.utils import make_coord_array from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d from podpac.core.coordinates.uniform_coordinates1d import UniformCoordinates1d from podpac.core.coordinates.stacked_coordinates import StackedCoordinates @@ -23,10 +24,12 @@ def test_empty(self): assert c.size == 0 assert c.shape == (0,) assert c.dtype is None - assert c.ctype == "point" assert c.is_monotonic is None assert c.is_descending is None assert c.is_uniform is None + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) def test_numerical_singleton(self): @@ -37,10 +40,12 @@ def test_numerical_singleton(self): assert c.size == 1 assert c.shape == (1,) assert c.dtype == float - assert c.ctype == "point" assert c.is_monotonic == True assert c.is_descending is None - assert c.is_uniform == True + assert c.is_uniform is None + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) def test_numerical_array(self): @@ -53,10 +58,12 @@ def test_numerical_array(self): assert c.size == 4 assert c.shape == (4,) assert c.dtype == float - assert c.ctype == "point" assert c.is_monotonic == False - assert c.is_descending is None + assert c.is_descending is False assert c.is_uniform == False + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) # sorted ascending @@ -68,10 +75,12 @@ def test_numerical_array(self): assert c.size == 4 assert c.shape == (4,) assert c.dtype == float - assert c.ctype == "midpoint" assert c.is_monotonic == True assert c.is_descending == False assert c.is_uniform == False + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) # sorted descending @@ -83,10 +92,12 @@ def test_numerical_array(self): assert c.size == 4 assert c.shape == (4,) assert c.dtype == float - assert c.ctype == "midpoint" assert c.is_monotonic == True assert c.is_descending == True assert c.is_uniform == False + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) # uniform ascending @@ -98,10 +109,12 @@ def test_numerical_array(self): assert c.size == 4 assert c.shape == (4,) assert c.dtype == float - assert c.ctype == "midpoint" assert c.is_monotonic == True assert c.is_descending == False assert c.is_uniform == True + assert c.start == 0.0 + assert c.stop == 6.0 + assert c.step == 2 repr(c) # uniform descending @@ -113,10 +126,12 @@ def test_numerical_array(self): assert c.size == 4 assert c.shape == (4,) assert c.dtype == float - assert c.ctype == "midpoint" assert c.is_monotonic == True assert c.is_descending == True assert c.is_uniform == True + assert c.start == 6.0 + assert c.stop == 0.0 + assert c.step == -2 repr(c) def test_datetime_singleton(self): @@ -127,26 +142,30 @@ def test_datetime_singleton(self): assert c.size == 1 assert c.shape == (1,) assert c.dtype == np.datetime64 - assert c.ctype == "point" assert c.is_monotonic == True assert c.is_descending is None - assert c.is_uniform == True + assert c.is_uniform is None + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) def test_datetime_array(self): # unsorted values = ["2018-01-01", "2019-01-01", "2017-01-01", "2018-01-02"] a = np.array(values).astype(np.datetime64) - c = ArrayCoordinates1d(values, ctype="point") + c = ArrayCoordinates1d(values) assert_equal(c.coordinates, a) assert_equal(c.bounds, np.array(["2017-01-01", "2019-01-01"]).astype(np.datetime64)) assert c.size == 4 assert c.shape == (4,) assert c.dtype == np.datetime64 - assert c.ctype == "point" assert c.is_monotonic == False - assert c.is_descending is None + assert c.is_descending == False assert c.is_uniform == False + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) # sorted ascending @@ -158,10 +177,12 @@ def test_datetime_array(self): assert c.size == 4 assert c.shape == (4,) assert c.dtype == np.datetime64 - assert c.ctype == "point" assert c.is_monotonic == True assert c.is_descending == False assert c.is_uniform == False + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) # sorted descending @@ -173,10 +194,12 @@ def test_datetime_array(self): assert c.size == 4 assert c.shape == (4,) assert c.dtype == np.datetime64 - assert c.ctype == "point" assert c.is_monotonic == True assert c.is_descending == True assert c.is_uniform == False + assert c.start is None + assert c.stop is None + assert c.step is None repr(c) # uniform ascending @@ -188,10 +211,12 @@ def test_datetime_array(self): assert c.size == 3 assert c.shape == (3,) assert c.dtype == np.datetime64 - assert c.ctype == "point" assert c.is_monotonic == True assert c.is_descending == False assert c.is_uniform == True + assert c.start == np.datetime64("2017-01-01") + assert c.stop == np.datetime64("2019-01-01") + assert c.step == np.timedelta64(365, "D") repr(c) # uniform descending @@ -203,10 +228,12 @@ def test_datetime_array(self): assert c.size == 3 assert c.shape == (3,) assert c.dtype == np.datetime64 - assert c.ctype == "point" assert c.is_monotonic == True assert c.is_descending == True assert c.is_uniform == True + assert c.start == np.datetime64("2019-01-01") + assert c.stop == np.datetime64("2017-01-01") + assert c.step == np.timedelta64(-365, "D") repr(c) def test_invalid_coords(self): @@ -219,16 +246,14 @@ def test_invalid_coords(self): def test_from_xarray(self): # numerical x = xr.DataArray([0, 1, 2], name="lat") - c = ArrayCoordinates1d.from_xarray(x, ctype="point") + c = ArrayCoordinates1d.from_xarray(x) assert c.name == "lat" - assert c.ctype == "point" assert_equal(c.coordinates, x.data) # datetime x = xr.DataArray([np.datetime64("2018-01-01"), np.datetime64("2018-01-02")], name="time") - c = ArrayCoordinates1d.from_xarray(x, ctype="point") + c = ArrayCoordinates1d.from_xarray(x) assert c.name == "time" - assert c.ctype == "point" assert_equal(c.coordinates, x.data) # unnamed @@ -242,11 +267,6 @@ def test_copy(self): assert c is not c2 assert c == c2 - c = ArrayCoordinates1d([1, 2, 3], segment_lengths=0.5) - c2 = c.copy() - assert c is not c2 - assert c == c2 - def test_name(self): ArrayCoordinates1d([]) ArrayCoordinates1d([], name="lat") @@ -278,135 +298,6 @@ def test_set_name(self): with pytest.raises(tl.TraitError): c._set_name("depth") - def test_set_ctype(self): - # set if not already set - c = ArrayCoordinates1d([]) - c._set_ctype("point") - assert c.ctype == "point" - - # ignore if set already - c = ArrayCoordinates1d([], ctype="point") - c._set_ctype("point") - assert c.ctype == "point" - - c._set_ctype("left") - assert c.ctype == "point" - - # invalid ctype - c = ArrayCoordinates1d([]) - with pytest.raises(tl.TraitError): - c._set_ctype("ABC") - - def test_segment_lengths_point(self): - with pytest.raises(TypeError, match="segment_lengths must be None"): - ArrayCoordinates1d([1, 2], ctype="point", segment_lengths=1.0) - - with pytest.raises(TypeError, match="segment_lengths must be None"): - ArrayCoordinates1d([1, 2], ctype="point", segment_lengths=[1.0, 1.0]) - - def test_segment_lengths_empty(self): - c = ArrayCoordinates1d([]) - assert c.segment_lengths is None - - def test_segment_lengths_delta(self): - # numeric - c = ArrayCoordinates1d([1, 2, 3], ctype="midpoint", segment_lengths=1.0) - assert c.segment_lengths == 1.0 - - # datetime - c = ArrayCoordinates1d(["2018-01-01", "2018-01-02"], ctype="midpoint", segment_lengths="1,D") - assert c.segment_lengths == np.timedelta64(1, "D") - - # mismatch - with pytest.raises(TypeError, match="coordinates and segment_lengths type mismatch"): - ArrayCoordinates1d([1, 2, 3], ctype="midpoint", segment_lengths="1,D") - - with pytest.raises(TypeError, match="coordinates and segment_lengths type mismatch"): - ArrayCoordinates1d(["2018-01-01", "2018-01-02"], ctype="midpoint", segment_lengths=1.0) - - def test_segment_lengths_array(self): - # numeric - c = ArrayCoordinates1d([1, 2, 3], ctype="midpoint", segment_lengths=[1.0, 1.0, 1.0]) - assert_equal(c.segment_lengths, np.array([1.0, 1.0, 1.0])) - - # datetime - c = ArrayCoordinates1d(["2018-01-01", "2018-01-02"], ctype="midpoint", segment_lengths=["1,D", "1,D"]) - assert_equal(c.segment_lengths, np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")])) - - # mismatch - with pytest.raises(ValueError, match="coordinates and segment_lengths size mismatch"): - ArrayCoordinates1d([1, 2, 3], ctype="midpoint", segment_lengths=[1.0, 1.0]) - - with pytest.raises(ValueError, match="coordinates and segment_lengths dtype mismatch"): - ArrayCoordinates1d([1, 2, 3], ctype="midpoint", segment_lengths=["1,D", "1,D", "1,D"]) - - with pytest.raises(ValueError, match="coordinates and segment_lengths dtype mismatch"): - ArrayCoordinates1d(["2018-01-01", "2018-01-02"], ctype="midpoint", segment_lengths=[1.0, 1.0]) - - def test_segment_lengths_inferred(self): - # no segment lengths for point coordinates - c = ArrayCoordinates1d([1, 2, 3], ctype="point") - assert c.segment_lengths is None - - c = ArrayCoordinates1d(["2018-01-01", "2018-01-02"], ctype="point") - assert c.segment_lengths is None - - # no segment lengths for empty segment coordinates - c = ArrayCoordinates1d([], ctype="midpoint") - assert c.segment_lengths is None - - # segment lengths required for datetime segment coordinates - with pytest.raises(TypeError, match="segment_lengths required"): - ArrayCoordinates1d(["2018-01-01", "2018-01-02"], ctype="midpoint") - - # segment lengths required for singleton segment coordinates - with pytest.raises(TypeError, match="segment_lengths required"): - ArrayCoordinates1d([1], ctype="midpoint") - - # segment lengths required for nonmonotonic segment coordinates - with pytest.raises(TypeError, match="segment_lengths required"): - ArrayCoordinates1d([1, 4, 2], ctype="midpoint") - - values = [1, 2, 4, 7] - - # left - c = ArrayCoordinates1d(values, ctype="left") - assert_equal(c.segment_lengths, [1.0, 2.0, 3.0, 3.0]) - - c = ArrayCoordinates1d(values[::-1], ctype="left") - assert_equal(c.segment_lengths, [3.0, 3.0, 2.0, 1.0]) - - # right - c = ArrayCoordinates1d(values, ctype="right") - assert_equal(c.segment_lengths, [1.0, 1.0, 2.0, 3.0]) - - c = ArrayCoordinates1d(values[::-1], ctype="right") - assert_equal(c.segment_lengths, [3.0, 2.0, 1.0, 1.0]) - - # midpoint - c = ArrayCoordinates1d(values, ctype="midpoint") - assert_equal(c.segment_lengths, [1.0, 1.5, 2.5, 3.0]) - - c = ArrayCoordinates1d(values[::-1], ctype="midpoint") - assert_equal(c.segment_lengths, [3, 2.5, 1.5, 1.0]) - - # uniform coordinates should use a single segment length - c = ArrayCoordinates1d([1.0, 2.0, 3.0], ctype="midpoint") - assert c.segment_lengths == 1.0 - - def test_segment_lengths_positive(self): - with pytest.raises(ValueError, match="segment_lengths must be positive"): - ArrayCoordinates1d([0, 1, 2], segment_lengths=[1.0, 0.0, 1.0]) - - with pytest.raises(ValueError, match="segment_lengths must be positive"): - ArrayCoordinates1d([0, 1, 2], segment_lengths=[1.0, -1.0, 1.0]) - - with pytest.raises(ValueError, match="segment_lengths must be positive"): - ArrayCoordinates1d([0, 1, 2], segment_lengths=0.0) - - with pytest.raises(ValueError, match="segment_lengths must be positive"): - ArrayCoordinates1d([0, 1, 2], segment_lengths=-1.0) - class TestArrayCoordinatesEq(object): def test_eq_type(self): @@ -469,43 +360,14 @@ def test_eq_name(self): c4.name = "lat" assert c1 == c4 - def test_eq_ctype(self): - c1 = ArrayCoordinates1d([0, 1, 3]) - c2 = ArrayCoordinates1d([0, 1, 3], ctype="midpoint") - c3 = ArrayCoordinates1d([0, 1, 3], ctype="left") - - assert c1 == c2 - assert c1 != c3 - assert c2 != c3 - - def test_eq_segment_lengths(self): - c1 = ArrayCoordinates1d([0, 1, 3], segment_lengths=[1, 1, 1]) - c2 = ArrayCoordinates1d([0, 1, 3], segment_lengths=[1, 1, 1]) - c3 = ArrayCoordinates1d([0, 1, 3], segment_lengths=[1, 2, 3]) - - assert c1 == c2 - assert c1 != c3 - - c1 = ArrayCoordinates1d([0, 1, 3], segment_lengths=1) - c2 = ArrayCoordinates1d([0, 1, 3], segment_lengths=1) - c3 = ArrayCoordinates1d([0, 1, 3], segment_lengths=2) - - assert c1 == c2 - assert c1 != c3 - - # mixed segment_lengths type - c1 = ArrayCoordinates1d([0, 1, 3], segment_lengths=[1, 1, 1]) - c2 = ArrayCoordinates1d([0, 1, 3], segment_lengths=1) - assert c1 == c2 - class TestArrayCoordinatesSerialization(object): def test_definition(self): # numerical - c = ArrayCoordinates1d([0, 1, 2], name="lat", ctype="point") + c = ArrayCoordinates1d([0, 1, 2], name="lat") d = c.definition assert isinstance(d, dict) - assert set(d.keys()) == {"values", "name", "ctype"} + assert set(d.keys()) == {"values", "name"} json.dumps(d, cls=podpac.core.utils.JSONEncoder) # test serializable c2 = ArrayCoordinates1d.from_definition(d) # test from_definition assert c2 == c @@ -519,20 +381,6 @@ def test_definition(self): c2 = ArrayCoordinates1d.from_definition(d) # test from_definition assert c2 == c - def test_definition_segment_lengths(self): - c = ArrayCoordinates1d([0, 1, 2], segment_lengths=0.5) - d = c.definition - assert isinstance(d, dict) - assert set(d.keys()) == {"values", "segment_lengths"} - json.dumps(d, cls=podpac.core.utils.JSONEncoder) # test serializable - c2 = ArrayCoordinates1d.from_definition(d) # test from_definition - assert c2 == c - - def test_invalid_definition(self): - d = {"coordinates": [0, 1, 2]} - with pytest.raises(ValueError, match='ArrayCoordinates1d definition requires "values" property'): - ArrayCoordinates1d.from_definition(d) - class TestArrayCoordinatesProperties(object): def test_dims(self): @@ -551,102 +399,6 @@ def test_dims(self): with pytest.raises(TypeError, match="cannot access dims property of unnamed Coordinates1d"): c.idims - def test_area_bounds_point(self): - # numerical - values = np.array([0.0, 1.0, 4.0, 6.0]) - c = ArrayCoordinates1d(values, ctype="point") - assert_equal(c.area_bounds, [0.0, 6.0]) - c = ArrayCoordinates1d(values[::-1], ctype="point") - assert_equal(c.area_bounds, [0.0, 6.0]) - c = ArrayCoordinates1d(values[[1, 2, 0, 3]], ctype="point") - assert_equal(c.area_bounds, [0.0, 6.0]) - - # datetime - values = np.array(["2017-01-01", "2017-01-02", "2018-01-01", "2019-01-01"]).astype(np.datetime64) - c = ArrayCoordinates1d(values, ctype="point") - assert_equal(c.area_bounds, np.array(["2017-01-01", "2019-01-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values[::-1], ctype="point") - assert_equal(c.area_bounds, np.array(["2017-01-01", "2019-01-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values[[1, 2, 0, 3]], ctype="point") - assert_equal(c.area_bounds, np.array(["2017-01-01", "2019-01-01"]).astype(np.datetime64)) - - def test_area_bounds_empty(self): - c = ArrayCoordinates1d([], ctype="midpoint") - assert np.all(np.isnan(c.area_bounds)) - - def test_area_bounds_left(self): - # numerical - values = np.array([0.0, 1.0, 4.0, 6.0]) - c = ArrayCoordinates1d(values, ctype="left") - assert_equal(c.area_bounds, [0.0, 8.0]) - c = ArrayCoordinates1d(values[::-1], ctype="left") - assert_equal(c.area_bounds, [0.0, 8.0]) - c = ArrayCoordinates1d(values[[1, 0, 3, 2]], ctype="left", segment_lengths=2.0) - assert_equal(c.area_bounds, [0.0, 8.0]) - c = ArrayCoordinates1d(values[[1, 0, 3, 2]], ctype="left", segment_lengths=[1.0, 1.0, 2.0, 1.0]) - assert_equal(c.area_bounds, [0.0, 8.0]) - - # datetime - values = np.array(["2017-01-02", "2017-01-01", "2019-01-01", "2018-01-01"]).astype(np.datetime64) - c = ArrayCoordinates1d(values, ctype="left", segment_lengths="1,D") - assert_equal(c.area_bounds, np.array(["2017-01-01", "2019-01-02"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="left", segment_lengths="1,M") - assert_equal(c.area_bounds, np.array(["2017-01-01", "2019-02-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="left", segment_lengths="1,Y") - assert_equal(c.area_bounds, np.array(["2017-01-01", "2020-01-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="left", segment_lengths=["2,D", "2,D", "1,D", "2,D"]) - assert_equal(c.area_bounds, np.array(["2017-01-01", "2019-01-02"]).astype(np.datetime64)) - - def test_area_bounds_right(self): - # numerical - values = np.array([0.0, 1.0, 4.0, 6.0]) - c = ArrayCoordinates1d(values, ctype="right") - assert_equal(c.area_bounds, [-1.0, 6.0]) - c = ArrayCoordinates1d(values[::-1], ctype="right") - assert_equal(c.area_bounds, [-1.0, 6.0]) - c = ArrayCoordinates1d(values[[1, 0, 3, 2]], ctype="right", segment_lengths=1.0) - assert_equal(c.area_bounds, [-1.0, 6.0]) - c = ArrayCoordinates1d(values[[1, 0, 3, 2]], ctype="right", segment_lengths=[3.0, 1.0, 3.0, 3.0]) - assert_equal(c.area_bounds, [-1.0, 6.0]) - - # datetime - values = np.array(["2017-01-02", "2017-01-01", "2019-01-01", "2018-01-01"]).astype(np.datetime64) - c = ArrayCoordinates1d(values, ctype="right", segment_lengths="1,D") - assert_equal(c.area_bounds, np.array(["2016-12-31", "2019-01-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="right", segment_lengths="1,M") - assert_equal(c.area_bounds, np.array(["2016-12-01", "2019-01-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="right", segment_lengths="1,Y") - assert_equal(c.area_bounds, np.array(["2016-01-01", "2019-01-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="right", segment_lengths=["2,D", "1,D", "2,D", "2,D"]) - assert_equal(c.area_bounds, np.array(["2016-12-31", "2019-01-01"]).astype(np.datetime64)) - - def test_area_bounds_midpoint(self): - # numerical - values = np.array([0.0, 1.0, 4.0, 6.0]) - c = ArrayCoordinates1d(values, ctype="midpoint") - assert_equal(c.area_bounds, [-0.5, 7.0]) - c = ArrayCoordinates1d(values[::-1], ctype="midpoint") - assert_equal(c.area_bounds, [-0.5, 7.0]) - c = ArrayCoordinates1d(values[[1, 0, 3, 2]], ctype="midpoint", segment_lengths=1.0) - assert_equal(c.area_bounds, [-0.5, 6.5]) - c = ArrayCoordinates1d(values[[1, 0, 3, 2]], ctype="midpoint", segment_lengths=[1.0, 2.0, 3.0, 4.0]) - assert_equal(c.area_bounds, [-1.0, 7.5]) - - # datetime - values = np.array(["2017-01-02", "2017-01-01", "2019-01-01", "2018-01-01"]).astype(np.datetime64) - c = ArrayCoordinates1d(values, ctype="midpoint", segment_lengths="2,D") - assert_equal(c.area_bounds, np.array(["2016-12-31", "2019-01-02"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="midpoint", segment_lengths="2,M") - assert_equal(c.area_bounds, np.array(["2016-12-01", "2019-02-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="midpoint", segment_lengths="2,Y") - assert_equal(c.area_bounds, np.array(["2016-01-01", "2020-01-01"]).astype(np.datetime64)) - c = ArrayCoordinates1d(values, ctype="midpoint", segment_lengths=["2,D", "4,D", "6,D", "8,D"]) - assert_equal(c.area_bounds, np.array(["2016-12-30", "2019-01-04"]).astype(np.datetime64)) - - # datetime divide_delta - c = ArrayCoordinates1d(values, ctype="midpoint", segment_lengths="1,D") - assert_equal(c.area_bounds, np.array(["2016-12-31 12", "2019-01-01 12"]).astype(np.datetime64)) - def test_properties(self): c = ArrayCoordinates1d([]) assert isinstance(c.properties, dict) @@ -656,14 +408,6 @@ def test_properties(self): assert isinstance(c.properties, dict) assert set(c.properties) == {"name"} - c = ArrayCoordinates1d([], ctype="point") - assert isinstance(c.properties, dict) - assert set(c.properties) == {"ctype"} - - c = ArrayCoordinates1d([1, 2], segment_lengths=1) - assert isinstance(c.properties, dict) - assert set(c.properties) == {"segment_lengths"} - def test_coords(self): c = ArrayCoordinates1d([1, 2], name="lat") coords = c.coords @@ -681,7 +425,7 @@ def test_len(self): assert len(c) == 3 def test_index(self): - c = ArrayCoordinates1d([20, 50, 60, 90, 40, 10], name="lat", ctype="point") + c = ArrayCoordinates1d([20, 50, 60, 90, 40, 10], name="lat") # int c2 = c[2] @@ -742,41 +486,66 @@ def test_index(self): with pytest.raises(IndexError): c[10] - def test_index_segment_lengths(self): - # array segment_lengths - c = ArrayCoordinates1d([1, 2, 4, 5], segment_lengths=[0.1, 0.2, 0.3, 0.4]) - c2 = c[1] - assert c2.segment_lengths == 0.2 or np.array_equal(c2.segment_lengths, [0.2]) +class TestArrayCoordinatesAreaBounds(object): + def test_get_area_bounds_numerical(self): + values = np.array([0.0, 1.0, 4.0, 6.0]) + c = ArrayCoordinates1d(values) - c2 = c[1:3] - assert_equal(c2.segment_lengths, [0.2, 0.3]) + # point + area_bounds = c.get_area_bounds(None) + assert_equal(area_bounds, [0.0, 6.0]) - c2 = c[[2, 1]] - assert_equal(c2.segment_lengths, [0.3, 0.2]) + # uniform + area_bounds = c.get_area_bounds(0.5) + assert_equal(area_bounds, [-0.5, 6.5]) - c2 = c[[]] - assert_equal(c2.segment_lengths, []) + # segment + area_bounds = c.get_area_bounds([-0.2, 0.7]) + assert_equal(area_bounds, [-0.2, 6.7]) - # uniform segment_lengths - c = ArrayCoordinates1d([1, 2, 4, 5], segment_lengths=0.5) + # polygon (i.e. there would be corresponding offets for another dimension) + area_bounds = c.get_area_bounds([-0.2, -0.5, 0.7, 0.5]) + assert_equal(area_bounds, [-0.5, 6.7]) + + # boundaries + area_bounds = c.get_area_bounds([[-0.4, 0.1], [-0.3, 0.2], [-0.2, 0.3], [-0.1, 0.4]]) + assert_equal(area_bounds, [-0.4, 6.4]) + + def test_get_area_bounds_datetime(self): + values = make_coord_array(["2017-01-02", "2017-01-01", "2019-01-01", "2018-01-01"]) + c = ArrayCoordinates1d(values) - c2 = c[1] - assert c2.segment_lengths == 0.5 + # point + area_bounds = c.get_area_bounds(None) + assert_equal(area_bounds, make_coord_array(["2017-01-01", "2019-01-01"])) - c2 = c[1:3] - assert c2.segment_lengths == 0.5 + # uniform + area_bounds = c.get_area_bounds("1,D") + assert_equal(area_bounds, make_coord_array(["2016-12-31", "2019-01-02"])) - c2 = c[[2, 1]] - assert c2.segment_lengths == 0.5 + area_bounds = c.get_area_bounds("1,M") + assert_equal(area_bounds, make_coord_array(["2016-12-01", "2019-02-01"])) - c2 = c[[]] - assert c2.segment_lengths == 0.5 + area_bounds = c.get_area_bounds("1,Y") + assert_equal(area_bounds, make_coord_array(["2016-01-01", "2020-01-01"])) + + # segment + area_bounds = c.get_area_bounds(["0,h", "12,h"]) + assert_equal(area_bounds, make_coord_array(["2017-01-01 00:00", "2019-01-01 12:00"])) + + def test_get_area_bounds_empty(self): + c = ArrayCoordinates1d([]) + area_bounds = c.get_area_bounds(1.0) + assert np.all(np.isnan(area_bounds)) + + @pytest.mark.xfail(reason="spec uncertain") + def test_get_area_bounds_overlapping(self): + values = np.array([0.0, 1.0, 4.0, 6.0]) + c = ArrayCoordinates1d(values) - # inferred segment_lengths - c = ArrayCoordinates1d([1, 2, 4, 7], ctype="left") - c2 = c[1] - assert c2.segment_lengths == 2.0 or np.array_equal(c2.segment_lengths, [2.0]) + area_bounds = c.get_area_bounds([[-0.1, 0.1], [-10.0, 10.0], [-0.1, 0.1], [-0.1, 0.1]]) + assert_equal(area_bounds, [-11.0, 11.0]) class TestArrayCoordinatesSelection(object): @@ -792,7 +561,7 @@ def test_select_empty_shortcut(self): assert_equal(c.coordinates[I], []) def test_select_all_shortcut(self): - c = ArrayCoordinates1d([20.0, 50.0, 60.0, 90.0, 40.0, 10.0], ctype="point") + c = ArrayCoordinates1d([20.0, 50.0, 60.0, 90.0, 40.0, 10.0]) bounds = [0, 100] s = c.select(bounds) @@ -803,7 +572,7 @@ def test_select_all_shortcut(self): assert_equal(c.coordinates[I], c.coordinates) def test_select_none_shortcut(self): - c = ArrayCoordinates1d([20.0, 50.0, 60.0, 90.0, 40.0, 10.0], ctype="point") + c = ArrayCoordinates1d([20.0, 50.0, 60.0, 90.0, 40.0, 10.0]) # above s = c.select([100, 200]) @@ -822,7 +591,7 @@ def test_select_none_shortcut(self): assert_equal(c.coordinates[I], []) def test_select(self): - c = ArrayCoordinates1d([20.0, 50.0, 60.0, 90.0, 40.0, 10.0], ctype="point") + c = ArrayCoordinates1d([20.0, 50.0, 60.0, 90.0, 40.0, 10.0]) # inner s = c.select([30.0, 55.0]) diff --git a/podpac/core/coordinates/test/test_base_coordinates.py b/podpac/core/coordinates/test/test_base_coordinates.py index 6662135a2..6ff31c377 100644 --- a/podpac/core/coordinates/test/test_base_coordinates.py +++ b/podpac/core/coordinates/test/test_base_coordinates.py @@ -23,7 +23,7 @@ def test_common_api(self): except NotImplementedError: pass - for method_name in ["_set_name", "_set_ctype"]: + for method_name in ["_set_name"]: try: method = getattr(c, method_name) method(None) @@ -40,6 +40,11 @@ def test_common_api(self): except NotImplementedError: pass + try: + c.get_area_bounds(None) + except NotImplementedError: + pass + try: c.select([0, 1]) except NotImplementedError: diff --git a/podpac/core/coordinates/test/test_coordinates.py b/podpac/core/coordinates/test/test_coordinates.py index 673f91059..aa10ea79b 100644 --- a/podpac/core/coordinates/test/test_coordinates.py +++ b/podpac/core/coordinates/test/test_coordinates.py @@ -14,6 +14,7 @@ from podpac.core.coordinates.stacked_coordinates import StackedCoordinates from podpac.core.coordinates.dependent_coordinates import DependentCoordinates from podpac.core.coordinates.rotated_coordinates import RotatedCoordinates +from podpac.core.coordinates.uniform_coordinates1d import UniformCoordinates1d from podpac.core.coordinates.cfunctions import crange, clinspace from podpac.core.coordinates.coordinates import Coordinates from podpac.core.coordinates.coordinates import concat, union, merge_dims @@ -445,23 +446,6 @@ def test_crs_with_vertical_units(self): with pytest.raises(ValueError): Coordinates([alt], crs="EPSG:2193") - def test_ctype(self): - # assign - lat = ArrayCoordinates1d([0, 1, 2]) - lon = ArrayCoordinates1d([0, 1, 2]) - - c = Coordinates([lat, lon], dims=["lat", "lon"], ctype="left") - assert c["lat"].ctype == "left" - assert c["lon"].ctype == "left" - - # don't overwrite - lat = ArrayCoordinates1d([0, 1, 2], ctype="right") - lon = ArrayCoordinates1d([0, 1, 2]) - - c = Coordinates([lat, lon], dims=["lat", "lon"], ctype="left") - assert c["lat"].ctype == "right" - assert c["lon"].ctype == "left" - class TestCoordinatesSerialization(object): def test_definition(self): @@ -546,8 +530,7 @@ def test_json(self): def test_from_url(self): crds = Coordinates([[41, 40], [-71, -70], "2018-05-19"], dims=["lat", "lon", "time"]) - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - crds2 = crds.transform("EPSG:3857") + crds2 = crds.transform("EPSG:3857") url = ( r"http://testwms/?map=map&&service=WMS&request=GetMap&layers=layer&styles=&format=image%2Fpng" @@ -555,37 +538,37 @@ def test_from_url(self): r"&bbox={},{},{},{}&time={}" ) - version = "1.1.1" + # version 1.1.1 for cc, epsg in zip([crds, crds2], ["3857", "4326"]): c = Coordinates.from_url( url.format( - crds2.bounds["lon"].min(), - crds2.bounds["lat"].min(), - crds2.bounds["lon"].max(), - crds2.bounds["lat"].max(), + min(crds2.bounds["lon"]), + min(crds2.bounds["lat"]), + max(crds2.bounds["lon"]), + max(crds2.bounds["lat"]), crds2.bounds["time"][0], - version=version, + version="1.1.1", epsg=epsg, ) ) - for d in crds.dims: - assert np.allclose(c.bounds[d].astype(float), crds2.bounds[d].astype(float)) - version = "1.3" + assert c.bounds == crds2.bounds + + # version 1.3 for cc, epsg in zip([crds, crds2], ["3857", "4326"]): c = Coordinates.from_url( url.format( - crds2.bounds["lat"].min(), - crds2.bounds["lon"].min(), - crds2.bounds["lat"].max(), - crds2.bounds["lon"].max(), + min(crds2.bounds["lat"]), + min(crds2.bounds["lon"]), + max(crds2.bounds["lat"]), + max(crds2.bounds["lon"]), crds2.bounds["time"][0], - version=version, + version="1.3", epsg=epsg, ) ) - for d in crds.dims: - assert np.allclose(c.bounds[d].astype(float), crds2.bounds[d].astype(float)) + + assert c.bounds == crds2.bounds class TestCoordinatesProperties(object): @@ -658,19 +641,6 @@ def test_bounds(self): assert_equal(bounds["lon"], c["lon"].bounds) assert_equal(bounds["time"], c["time"].bounds) - def test_area_bounds(self): - lat = [0, 1, 2] - lon = [10, 20, 30] - dates = ["2018-01-01", "2018-01-02"] - - c = Coordinates([[lat, lon], dates], dims=["lat_lon", "time"]) - area_bounds = c.area_bounds - assert isinstance(area_bounds, dict) - assert set(area_bounds.keys()) == set(c.udims) - assert_equal(area_bounds["lat"], c["lat"].area_bounds) - assert_equal(area_bounds["lon"], c["lon"].area_bounds) - assert_equal(area_bounds["time"], c["time"].area_bounds) - class TestCoordinatesDict(object): coords = Coordinates([[[0, 1, 2], [10, 20, 30]], ["2018-01-01", "2018-01-02"]], dims=["lat_lon", "time"]) @@ -1190,65 +1160,6 @@ def test_transpose_stacked(self): c.transpose("time", "lon_lat", in_place=True) assert c.dims == ("time", "lon_lat") - def test_transform(self): - c = Coordinates([[0, 1], [10, 20, 30, 40], ["2018-01-01", "2018-01-02"]], dims=["lat", "lon", "time"]) - - # transform - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - t = c.transform("EPSG:2193") - assert c.crs == "EPSG:4326" - assert t.crs == "EPSG:2193" - assert round(t["lat"].coordinates[0, 0]) == 29995930.0 - - # no transform needed - t = c.transform("EPSG:4326") - assert c.crs == "EPSG:4326" - assert t.crs == "EPSG:4326" - assert t is not c - assert t == c - - # support proj4 strings - proj = "+proj=merc +lat_ts=56.5 +ellps=GRS80" - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - t = c.transform(proj) - assert c.crs == "EPSG:4326" - assert t.crs == proj - assert round(t["lat"].coordinates[0, 0]) == 0.0 - - # no parameter - with pytest.raises(TypeError, match="transform requires crs argument"): - c.transform() - - def test_transform_stacked(self): - c = Coordinates([[[0, 1], [10, 20]], ["2018-01-01", "2018-01-02", "2018-01-03"]], dims=["lat_lon", "time"]) - - proj = "+proj=merc +lat_ts=56.5 +ellps=GRS80" - t = c.transform(proj) - assert c.crs == "EPSG:4326" - assert t.crs == proj - assert round(t["lat"].coordinates[0]) == 0.0 - - def test_transform_alt(self): - c = Coordinates( - [[0, 1], [10, 20, 30, 40], ["2018-01-01", "2018-01-02"], [100, 200, 300]], - dims=["lat", "lon", "time", "alt"], - crs="+proj=merc +vunits=us-ft", - ) - - proj = "+proj=merc +vunits=m" - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - t = c.transform(proj) - assert c.crs == "+proj=merc +vunits=us-ft" - assert t.crs == "+proj=merc +vunits=m" - np.testing.assert_array_almost_equal(t["lat"].coordinates[0], [0.0, 0.0, 0.0, 0.0]) - np.testing.assert_array_almost_equal(t["lat"].coordinates[1], [1.0, 1.0, 1.0, 1.0]) - np.testing.assert_array_almost_equal(t["lon"].coordinates[:, 0], [10.0, 10.0]) - np.testing.assert_array_almost_equal(t["lon"].coordinates[:, 1], [20.0, 20.0]) - np.testing.assert_array_almost_equal(t["lon"].coordinates[:, 2], [30.0, 30.0]) - np.testing.assert_array_almost_equal(t["lon"].coordinates[:, 3], [40.0, 40.0]) - assert t["time"] == c["time"] - np.testing.assert_array_almost_equal(t["alt"].coordinates, 0.30480061 * c["alt"].coordinates) - def test_select_single(self): lat = ArrayCoordinates1d([0, 1, 2, 3], name="lat") lon = ArrayCoordinates1d([10, 20, 30, 40], name="lon") @@ -1420,7 +1331,7 @@ def test_intersect_dims(self): assert c2["lon"] == c["lon"][2:5] def test_intersect_crs(self): - # should change the other coordinates crs into the native coordinates crs for intersect + # should change the other coordinates crs into the coordinates crs for intersect c = Coordinates( [np.linspace(0, 10, 11), np.linspace(0, 10, 11), ["2018-01-01", "2018-01-02"]], dims=["lat", "lon", "time"] ) @@ -1430,8 +1341,7 @@ def test_intersect_crs(self): crs="EPSG:2193", ) - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - c_int = c.intersect(o) + c_int = c.intersect(o) assert c_int.crs == c.crs assert o.crs == "EPSG:2193" # didn't get changed assert np.all(c_int["lat"].bounds == np.array([5.0, 10.0])) @@ -1454,7 +1364,7 @@ class TestCoordinatesSpecial(object): def test_repr(self): repr(Coordinates([[0, 1], [10, 20], ["2018-01-01", "2018-01-02"]], dims=["lat", "lon", "time"])) repr(Coordinates([[[0, 1], [10, 20]], ["2018-01-01", "2018-01-02"]], dims=["lat_lon", "time"])) - repr(Coordinates([0, 10, []], dims=["lat", "lon", "time"], ctype="point")) + repr(Coordinates([0, 10, []], dims=["lat", "lon", "time"])) repr(Coordinates([crange(0, 10, 0.5)], dims=["alt"], crs="+proj=merc +vunits=us-ft")) repr(Coordinates([])) # TODO dependent coordinates @@ -1495,24 +1405,6 @@ def test_eq_ne_hash(self): assert c1.hash != c4.hash assert c1.hash != c5.hash - def test_eq_ne_hash_ctype(self): - lat = [0, 1, 2] - lon = [10, 20, 30] - c1 = Coordinates([lat, lon], dims=["lat", "lon"]) - c2 = Coordinates([lat, lon], dims=["lat", "lon"], ctype="point") - - # eq - assert not c1 == c2 - assert c2 == deepcopy(c2) - - # ne (this only matters in python 2) - assert c1 != c2 - assert not c2 != deepcopy(c2) - - # hash - assert c1.hash != c2.hash - assert c2.hash == deepcopy(c2).hash - def test_eq_ne_hash_crs(self): lat = [0, 1, 2] lon = [10, 20, 30] @@ -1626,20 +1518,20 @@ def uniform_working(self): c = Coordinates([clinspace(1.5, 0.5, 5, "lat"), clinspace(1, 2, 9, "lon")]) tf = np.array(c.geotransform).reshape(2, 3) np.testing.assert_almost_equal( - tf, np.array([[c["lon"].area_bounds[0], c["lon"].step, 0], [c["lat"].area_bounds[1], 0, c["lat"].step]]) + tf, np.array([[c["lon"].bounds[0], c["lon"].step, 0], [c["lat"].bounds[1], 0, c["lat"].step]]) ) # order: lon, lat c = Coordinates([clinspace(0.5, 1.5, 5, "lon"), clinspace(1, 2, 9, "lat")]) tf = np.array(c.geotransform).reshape(2, 3) np.testing.assert_almost_equal( - tf, np.array([[c["lon"].area_bounds[0], 0, c["lon"].step], [c["lat"].area_bounds[0], c["lat"].step, 0]]) + tf, np.array([[c["lon"].bounds[0], 0, c["lon"].step], [c["lat"].bounds[0], c["lat"].step, 0]]) ) # order: lon, -lat, time c = Coordinates([clinspace(0.5, 1.5, 5, "lon"), clinspace(2, 1, 9, "lat"), crange(10, 11, 2, "time")]) tf = np.array(c.geotransform).reshape(2, 3) np.testing.assert_almost_equal( - tf, np.array([[c["lon"].area_bounds[0], 0, c["lon"].step], [c["lat"].area_bounds[1], c["lat"].step, 0]]) + tf, np.array([[c["lon"].bounds[0], 0, c["lon"].step], [c["lat"].bounds[1], c["lat"].step, 0]]) ) # order: -lon, -lat, time, alt c = Coordinates( @@ -1652,7 +1544,7 @@ def uniform_working(self): ) tf = np.array(c.geotransform).reshape(2, 3) np.testing.assert_almost_equal( - tf, np.array([[c["lon"].area_bounds[1], 0, c["lon"].step], [c["lat"].area_bounds[1], c["lat"].step, 0]]) + tf, np.array([[c["lon"].bounds[1], 0, c["lon"].step], [c["lat"].bounds[1], c["lat"].step, 0]]) ) def error_time_alt_too_big(self): @@ -1730,3 +1622,170 @@ def rot_coords_working(self): ] ), ) + + +class TestCoordinatesMethodTransform(object): + def test_transform(self): + c = Coordinates([[0, 1], [10, 20, 30, 40], ["2018-01-01", "2018-01-02"]], dims=["lat", "lon", "time"]) + + # transform + t = c.transform("EPSG:2193") + assert c.crs == "EPSG:4326" + assert t.crs == "EPSG:2193" + assert round(t["lat"].coordinates[0, 0]) == 29995930.0 + + # no transform needed + t = c.transform("EPSG:4326") + assert c.crs == "EPSG:4326" + assert t.crs == "EPSG:4326" + assert t is not c + assert t == c + + # support proj4 strings + proj = "+proj=merc +lat_ts=56.5 +ellps=GRS80" + t = c.transform(proj) + assert c.crs == "EPSG:4326" + assert t.crs == proj + assert round(t["lat"].coordinates[0]) == 0.0 + + def test_transform_stacked(self): + c = Coordinates([[[0, 1], [10, 20]], ["2018-01-01", "2018-01-02", "2018-01-03"]], dims=["lat_lon", "time"]) + + proj = "+proj=merc +lat_ts=56.5 +ellps=GRS80" + t = c.transform(proj) + assert c.crs == "EPSG:4326" + assert t.crs == proj + assert round(t["lat"].coordinates[0]) == 0.0 + + def test_transform_alt(self): + c = Coordinates( + [[0, 1], [10, 20, 30, 40], ["2018-01-01", "2018-01-02"], [100, 200, 300]], + dims=["lat", "lon", "time", "alt"], + crs="+proj=merc +vunits=us-ft", + ) + + proj = "+proj=merc +vunits=m" + t = c.transform(proj) + assert c.crs == "+proj=merc +vunits=us-ft" + assert t.crs == "+proj=merc +vunits=m" + np.testing.assert_array_almost_equal(t["lat"].coordinates, c["lat"].coordinates) + np.testing.assert_array_almost_equal(t["lon"].coordinates, c["lon"].coordinates) + assert t["time"] == c["time"] + np.testing.assert_array_almost_equal(t["alt"].coordinates, 0.30480061 * c["alt"].coordinates) + + def test_transform_uniform_to_uniform(self): + c = Coordinates([clinspace(-90, 90, 5, "lat"), clinspace(-180, 180, 11, "lon"), clinspace(0, 1, 5, "time")]) + t = c.transform("EPSG:4269") # NAD 1983 uses same ellipsoid + + assert isinstance(t["lat"], UniformCoordinates1d) + assert isinstance(t["lon"], UniformCoordinates1d) + assert t.crs == "EPSG:4269" + assert t.dims == c.dims + + # Same thing, change the order of the inputs + c = Coordinates( + [clinspace(90, -90, 5, "lat"), clinspace(180, -180, 11, "lon"), clinspace(0, 1, 5, "time")][::-1] + ) + t = c.transform("EPSG:4269") # NAD 1983 uses same ellipsoid + + assert isinstance(t["lat"], UniformCoordinates1d) + assert isinstance(t["lon"], UniformCoordinates1d) + assert t.crs == "EPSG:4269" + + assert t.dims == c.dims + for d in ["lat", "lon"]: + for a in ["start", "stop", "step"]: + np.testing.assert_almost_equal(getattr(c[d], a), getattr(t[d], a)) + + def test_transform_uniform_stacked(self): + # TODO: Fix this test + c = Coordinates( + [[clinspace(-90, 90, 11, "lat"), clinspace(-180, 180, 11, "lon")], clinspace(0, 1, 5, "time")], + [["lat", "lon"], "time"], + ) + t = c.transform("EPSG:4269") # NAD 1983 uses same ellipsoid + + assert isinstance(t["lat"], UniformCoordinates1d) + assert isinstance(t["lon"], UniformCoordinates1d) + np.testing.assert_array_almost_equal(t["lat"].coordinates, c["lat"].coordinates) + np.testing.assert_array_almost_equal(t["lon"].coordinates, c["lon"].coordinates) + + def test_transform_uniform_to_array(self): + c = Coordinates([clinspace(-45, 45, 5, "lat"), clinspace(-180, 180, 11, "lon")]) + + # Ok for array coordinates + t = c.transform("EPSG:3395") + + assert isinstance(t["lat"], ArrayCoordinates1d) + assert isinstance(t["lon"], UniformCoordinates1d) + + t2 = t.transform(c.crs) + + for d in ["lon", "lat"]: + for a in ["start", "stop", "step"]: + np.testing.assert_almost_equal(getattr(c[d], a), getattr(t2[d], a)) + + def test_transform_uniform_to_dependent_to_uniform(self): + c = Coordinates([clinspace(50, 45, 7, "lat"), clinspace(70, 75, 11, "lon")]) + + # Ok for array coordinates + t = c.transform("EPSG:32629") + + assert "lat,lon" in t.dims + t2 = t.transform(c.crs) + for d in ["lat", "lon"]: + for a in ["start", "stop", "step"]: + np.testing.assert_almost_equal(getattr(c[d], a), getattr(t2[d], a)) + + def test_transform_dependent_stacked_to_dependent_stacked(self): + c = Coordinates([[np.array([[1, 2, 3], [4, 5, 6]]), np.array([[7, 8, 9], [10, 11, 12]])]], ["lat,lon"]) + c2 = Coordinates([[np.array([1, 2, 3, 4, 5, 6]), np.array([7, 8, 9, 10, 11, 12])]], ["lat_lon"]) + + # Ok for array coordinates + t = c.transform("EPSG:32629") + assert "lat,lon" in t.dims + t_s = c2.transform("EPSG:32629") + assert "lat_lon" in t_s.dims + + for d in ["lat", "lon"]: + np.testing.assert_almost_equal(t[d].coordinates.ravel(), t_s[d].coordinates.ravel()) + + t2 = t.transform(c.crs) + t2_s = t_s.transform(c.crs) + + for d in ["lat", "lon"]: + np.testing.assert_almost_equal(t2[d].coordinates, c[d].coordinates) + np.testing.assert_almost_equal(t2_s[d].coordinates, c2[d].coordinates) + + # Reverse order + c = Coordinates([[np.array([[1, 2, 3], [4, 5, 6]]), np.array([[7, 8, 9], [10, 11, 12]])]], ["lon,lat"]) + c2 = Coordinates([[np.array([1, 2, 3, 4, 5, 6]), np.array([7, 8, 9, 10, 11, 12])]], ["lon_lat"]) + + # Ok for array coordinates + t = c.transform("EPSG:32629") + assert "lon,lat" in t.dims + t_s = c2.transform("EPSG:32629") + assert "lon_lat" in t_s.dims + + for d in ["lat", "lon"]: + np.testing.assert_almost_equal(t[d].coordinates.ravel(), t_s[d].coordinates.ravel()) + + t2 = t.transform(c.crs) + t2_s = t_s.transform(c.crs) + + for d in ["lat", "lon"]: + np.testing.assert_almost_equal(t2[d].coordinates, c[d].coordinates) + np.testing.assert_almost_equal(t2_s[d].coordinates, c2[d].coordinates) + + def test_transform_missing_lat_lon(self): + with pytest.raises(ValueError, match="Cannot transform lat coordinates without lon coordinates"): + grid_coords = Coordinates([np.linspace(-10, 10, 21)], dims=["lat"]) + grid_coords.transform(crs="EPSG:2193") + + with pytest.raises(ValueError, match="Cannot transform lon coordinates without lat coordinates"): + stack_coords = Coordinates([(np.linspace(-10, 10, 21), np.linspace(-30, -10, 21))], dims=["lon_time"]) + stack_coords.transform(crs="EPSG:2193") + + with pytest.raises(ValueError, match="nonadjacent lat and lon"): + grid_coords = Coordinates([np.linspace(-10, 10, 21), [1], [1, 2, 3]], dims=["lat", "time", "lon"]) + grid_coords.transform(crs="EPSG:2193") diff --git a/podpac/core/coordinates/test/test_coordinates1d.py b/podpac/core/coordinates/test/test_coordinates1d.py index 061c284f2..092cb8028 100644 --- a/podpac/core/coordinates/test/test_coordinates1d.py +++ b/podpac/core/coordinates/test/test_coordinates1d.py @@ -1,18 +1,24 @@ -# see test_array_coordinates1d.py +import pytest + from podpac.core.coordinates.coordinates1d import Coordinates1d class TestCoordinates1d(object): + """ + See test_array_coordinates1d.py for additional Coordinates1d coverage + """ + def test_common_api(self): c = Coordinates1d(name="lat") attrs = [ "name", - "ctype", - "segment_lengths", "is_monotonic", "is_descending", "is_uniform", + "start", + "stop", + "step", "dims", "idims", "udims", @@ -21,9 +27,7 @@ def test_common_api(self): "dtype", "deltatype", "bounds", - "area_bounds", "coords", - "properties", "definition", "full_definition", ] diff --git a/podpac/core/coordinates/test/test_dependent_coordinates.py b/podpac/core/coordinates/test/test_dependent_coordinates.py index ba759baac..5a0654ce1 100644 --- a/podpac/core/coordinates/test/test_dependent_coordinates.py +++ b/podpac/core/coordinates/test/test_dependent_coordinates.py @@ -73,76 +73,6 @@ def test_set_name(self): with pytest.raises(ValueError, match="Dimension mismatch"): c._set_name("lon,lat") - def test_ctype_and_segment_lengths(self): - # explicit - c = DependentCoordinates((LAT, LON), ctypes=["left", "right"], segment_lengths=[1.0, 2.0]) - assert c.ctypes == ("left", "right") - assert c.segment_lengths == (1.0, 2.0) - - c = DependentCoordinates((LAT, LON), ctypes=["point", "point"]) - assert c.ctypes == ("point", "point") - assert c.segment_lengths == (None, None) - - c = DependentCoordinates((LAT, LON), ctypes=["midpoint", "point"], segment_lengths=[1.0, None]) - assert c.ctypes == ("midpoint", "point") - assert c.segment_lengths == (1.0, None) - - # single value - c = DependentCoordinates((LAT, LON), ctypes="left", segment_lengths=1.0) - assert c.ctypes == ("left", "left") - assert c.segment_lengths == (1.0, 1.0) - - c = DependentCoordinates((LAT, LON), ctypes="point") - assert c.ctypes == ("point", "point") - assert c.segment_lengths == (None, None) - - # defaults - c = DependentCoordinates((LAT, LON)) - assert c.ctypes == ("point", "point") - - # don't overwrite - c = DependentCoordinates((LAT, LON), ctypes="left", segment_lengths=1.0) - c._set_ctype("right") - assert c.ctypes == ("left", "left") - - # size mismatch - with pytest.raises(ValueError, match="size mismatch"): - DependentCoordinates((LAT, LON), ctypes=["left", "left", "left"], segment_lengths=1.0) - - with pytest.raises(ValueError, match="segment_lengths and coordinates size mismatch"): - DependentCoordinates((LAT, LON), ctypes="left", segment_lengths=[1.0, 1.0, 1.0]) - - # segment lengths required - with pytest.raises(TypeError, match="segment_lengths cannot be None"): - DependentCoordinates((LAT, LON), ctypes="left") - - with pytest.raises(TypeError, match="segment_lengths cannot be None"): - DependentCoordinates((LAT, LON), ctypes=["left", "point"]) - - with pytest.raises(TypeError, match="segment_lengths cannot be None"): - DependentCoordinates((LAT, LON), ctypes=["left", "point"], segment_lengths=[None, None]) - - # segment lengths prohibited - with pytest.raises(TypeError, match="segment_lengths must be None"): - DependentCoordinates((LAT, LON), segment_lengths=1.0) - - with pytest.raises(TypeError, match="segment_lengths must be None"): - DependentCoordinates((LAT, LON), ctypes="point", segment_lengths=1.0) - - with pytest.raises(TypeError, match="segment_lengths must be None"): - DependentCoordinates((LAT, LON), ctypes=["left", "point"], segment_lengths=[1.0, 1.0]) - - # invalid - with pytest.raises(tl.TraitError): - DependentCoordinates((LAT, LON), ctypes="abc") - - # invalid segment_lengths - with pytest.raises(ValueError): - DependentCoordinates((LAT, LON), ctypes="left", segment_lengths="abc") - - with pytest.raises(ValueError, match="segment_lengths must be positive"): - DependentCoordinates((LAT, LON), ctypes=["left", "right"], segment_lengths=[1.0, -2.0]) - def test_copy(self): c = DependentCoordinates((LAT, LON)) @@ -196,7 +126,7 @@ def test_full_definition(self): d = c.full_definition assert isinstance(d, dict) - assert set(d.keys()) == {"dims", "values", "ctypes", "segment_lengths"} + assert set(d.keys()) == {"dims", "values"} json.dumps(d, cls=podpac.core.utils.JSONEncoder) # test serializable @@ -236,18 +166,6 @@ def test_bounds(self): with pytest.raises(ValueError, match="Cannot get bounds"): c.bounds - def test_area_bounds(self): - c = DependentCoordinates([LAT, LON], dims=["lat", "lon"]) - area_bounds = c.area_bounds - assert isinstance(area_bounds, dict) - assert set(area_bounds.keys()) == set(c.udims) - assert_equal(area_bounds["lat"], c["lat"].area_bounds) - assert_equal(area_bounds["lon"], c["lon"].area_bounds) - - c = DependentCoordinates([LAT, LON]) - with pytest.raises(ValueError, match="Cannot get area_bounds"): - c.area_bounds - class TestDependentCoordinatesIndexing(object): def test_get_dim(self): @@ -266,21 +184,17 @@ def test_get_dim(self): c["other"] def test_get_dim_with_properties(self): - c = DependentCoordinates([LAT, LON], dims=["lat", "lon"], ctypes=["left", "right"], segment_lengths=[1.0, 2.0]) + c = DependentCoordinates([LAT, LON], dims=["lat", "lon"]) lat = c["lat"] assert isinstance(lat, ArrayCoordinatesNd) assert lat.name == c.dims[0] - assert lat.ctype == c.ctypes[0] - assert lat.segment_lengths == c.segment_lengths[0] assert lat.shape == c.shape repr(lat) lon = c["lon"] assert isinstance(lon, ArrayCoordinatesNd) assert lon.name == c.dims[1] - assert lon.ctype == c.ctypes[1] - assert lon.segment_lengths == c.segment_lengths[1] assert lon.shape == c.shape repr(lon) @@ -320,12 +234,10 @@ def test_get_index(self): assert_equal(c2._coords[1].coordinates, lon[B]) def test_get_index_with_properties(self): - c = DependentCoordinates([LAT, LON], dims=["lat", "lon"], ctypes=["left", "right"], segment_lengths=[1.0, 2.0]) + c = DependentCoordinates([LAT, LON], dims=["lat", "lon"]) c2 = c[[1, 2]] assert c2.dims == c.dims - assert c2.ctypes == c.ctypes - assert c2.segment_lengths == c.segment_lengths def test_iter(self): c = DependentCoordinates([LAT, LON], dims=["lat", "lon"]) diff --git a/podpac/core/coordinates/test/test_polar_coordinates.py b/podpac/core/coordinates/test/test_polar_coordinates.py index 898952a8c..7a8b806cc 100644 --- a/podpac/core/coordinates/test/test_polar_coordinates.py +++ b/podpac/core/coordinates/test/test_polar_coordinates.py @@ -87,15 +87,6 @@ def test_eq_theta(self): c2 = PolarCoordinates(center=[1.5, 2.0], radius=[1, 2, 4, 5], theta_size=7, dims=["lat", "lon"]) assert c1 != c2 - def test_eq_properties(self): - c1 = PolarCoordinates( - center=[1.5, 2.0], radius=[1, 2, 4, 5], theta_size=8, dims=["lat", "lon"], ctypes="left", segment_lengths=1 - ) - c2 = PolarCoordinates( - center=[1.5, 2.0], radius=[1, 2, 4, 5], theta_size=8, dims=["lat", "lon"], ctypes="right", segment_lengths=1 - ) - assert c1 != c2 - def test_eq(self): c1 = PolarCoordinates(center=[1.5, 2.0], radius=[1, 2, 4, 5], theta_size=8, dims=["lat", "lon"]) c2 = PolarCoordinates(center=[1.5, 2.0], radius=[1, 2, 4, 5], theta_size=8, dims=["lat", "lon"]) @@ -168,7 +159,7 @@ def test_full_definition(self): d = c.full_definition assert isinstance(d, dict) - assert set(d.keys()) == {"dims", "radius", "center", "theta", "ctypes", "segment_lengths"} + assert set(d.keys()) == {"dims", "radius", "center", "theta"} json.dumps(d, cls=podpac.core.utils.JSONEncoder) # test serializable diff --git a/podpac/core/coordinates/test/test_rotated_coordinates.py b/podpac/core/coordinates/test/test_rotated_coordinates.py index e933f8478..53bb76073 100644 --- a/podpac/core/coordinates/test/test_rotated_coordinates.py +++ b/podpac/core/coordinates/test/test_rotated_coordinates.py @@ -88,18 +88,6 @@ def test_thetas(self): c = RotatedCoordinates(shape=(3, 4), theta=-np.pi / 4, origin=[10, 20], step=[1.0, 2.0], dims=["lat", "lon"]) assert_allclose(c.corner, [15.656854, 22.828427]) - def test_ctypes(self): - c = RotatedCoordinates( - shape=(3, 4), - theta=np.pi / 4, - origin=[10, 20], - step=[1.0, 2.0], - dims=["lat", "lon"], - ctypes=["left", "right"], - segment_lengths=1.0, - ) - repr(c) - def test_invalid(self): with pytest.raises(ValueError, match="Invalid shape"): RotatedCoordinates(shape=(-3, 4), theta=np.pi / 4, origin=[10, 20], step=[1.0, 2.0], dims=["lat", "lon"]) @@ -213,7 +201,7 @@ def test_full_definition(self): d = c.full_definition assert isinstance(d, dict) - assert set(d.keys()) == {"dims", "shape", "theta", "origin", "step", "ctypes", "segment_lengths"} + assert set(d.keys()) == {"dims", "shape", "theta", "origin", "step"} json.dumps(d, cls=podpac.core.utils.JSONEncoder) # test serializable diff --git a/podpac/core/coordinates/test/test_stacked_coordinates.py b/podpac/core/coordinates/test/test_stacked_coordinates.py index cb82ab49a..ab94023bc 100644 --- a/podpac/core/coordinates/test/test_stacked_coordinates.py +++ b/podpac/core/coordinates/test/test_stacked_coordinates.py @@ -43,17 +43,6 @@ def test_init_Coordinates1d(self): repr(c) - def test_ctype(self): - lat = ArrayCoordinates1d([0, 1, 2], name="lat", ctype="left") - lon = ArrayCoordinates1d([10, 20, 30], name="lon") - c = StackedCoordinates([lat, lon], ctype="right") - - # lon ctype set by StackedCoordinates - assert c["lon"].ctype == "right" - - # but lat is left by StackedCoordinates because it was already explicitly set - assert c["lat"].ctype == "left" - def test_coercion_with_dims(self): c = StackedCoordinates([[0, 1, 2], [10, 20, 30]], dims=["lat", "lon"]) assert c.dims == ("lat", "lon") @@ -290,22 +279,6 @@ def test_bounds(self): with pytest.raises(ValueError, match="Cannot get bounds"): c.bounds - def test_area_bounds(self): - lat = [0, 1, 2] - lon = [10, 20, 30] - dates = ["2018-01-01", "2018-01-02"] - - c = StackedCoordinates([lat, lon], dims=["lat", "lon"]) - area_bounds = c.area_bounds - assert isinstance(area_bounds, dict) - assert set(area_bounds.keys()) == set(c.udims) - assert_equal(area_bounds["lat"], c["lat"].area_bounds) - assert_equal(area_bounds["lon"], c["lon"].area_bounds) - - c = StackedCoordinates([lat, lon]) - with pytest.raises(ValueError, match="Cannot get area_bounds"): - c.area_bounds - class TestStackedCoordinatesIndexing(object): def test_get_dim(self): diff --git a/podpac/core/coordinates/test/test_uniform_coordinates1d.py b/podpac/core/coordinates/test/test_uniform_coordinates1d.py index 3e4ef61b4..eee679738 100644 --- a/podpac/core/coordinates/test/test_uniform_coordinates1d.py +++ b/podpac/core/coordinates/test/test_uniform_coordinates1d.py @@ -7,6 +7,7 @@ from numpy.testing import assert_equal import podpac +from podpac.core.coordinates.utils import make_coord_array from podpac.core.coordinates.coordinates1d import Coordinates1d from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d from podpac.core.coordinates.uniform_coordinates1d import UniformCoordinates1d @@ -394,12 +395,10 @@ def test_copy(self): assert c is not c2 assert c == c2 - c = UniformCoordinates1d(0, 10, 50, segment_lengths=0.5) - c2 = c.copy() - assert c is not c2 - assert c == c2 - def test_invalid_init(self): + with pytest.raises(ValueError): + UniformCoordinates1d(0, 0, 0) + with pytest.raises(ValueError): UniformCoordinates1d(0, 50, 0) @@ -457,26 +456,6 @@ def test_invalid_init(self): with pytest.raises(TypeError): UniformCoordinates1d("2018-01-10", "2018-01-01", size="1,D") - def test_segment_lengths(self): - c = UniformCoordinates1d(0, 50, 10, segment_lengths=5) - assert c.segment_lengths == 5 - - c = UniformCoordinates1d(0, 50, 10, segment_lengths=[5, 5, 5, 5, 5, 5]) - assert_equal(c.segment_lengths, np.array([5.0, 5.0, 5.0, 5.0, 5.0, 5.0])) - - def test_segment_lengths_inferred(self): - # numerical - c = UniformCoordinates1d(0, 50, 10) - assert c.segment_lengths == 10 - - # datetime - c = UniformCoordinates1d("2018-01-01", "2018-01-04", "1,D") - assert c.segment_lengths == np.timedelta64(1, "D") - - # point coordinates - c = UniformCoordinates1d(0, 50, 10, ctype="point") - assert c.segment_lengths is None - class TestUniformCoordinatesEq(object): def test_equal(self): @@ -503,22 +482,14 @@ def test_equal_array_coordinates(self): assert c1 == c2 assert c1 != c3 - def test_equal_segment_lengths(object): - c1 = UniformCoordinates1d(0, 50, 10) - c2 = UniformCoordinates1d(0, 50, 10, segment_lengths=10) - c3 = UniformCoordinates1d(0, 50, 10, segment_lengths=5) - - assert c1 == c2 - assert c1 != c3 - class TestUniformCoordinatesSerialization(object): def test_definition(self): # numerical - c = UniformCoordinates1d(0, 50, 10, name="lat", ctype="point") + c = UniformCoordinates1d(0, 50, 10, name="lat") d = c.definition assert isinstance(d, dict) - assert set(d.keys()) == set(["start", "stop", "step", "name", "ctype"]) + assert set(d.keys()) == set(["start", "stop", "step", "name"]) json.dumps(d, cls=podpac.core.utils.JSONEncoder) # test serializable c2 = UniformCoordinates1d.from_definition(d) # test from_definition assert c2 == c @@ -532,15 +503,6 @@ def test_definition(self): c2 = UniformCoordinates1d.from_definition(d) # test from_definition assert c2 == c - def test_definition_segment_lengths(self): - c = UniformCoordinates1d(0, 50, 10, segment_lengths=0.5) - d = c.definition - assert isinstance(d, dict) - assert set(d.keys()) == set(["start", "stop", "step", "segment_lengths"]) - json.dumps(d, cls=podpac.core.utils.JSONEncoder) # test serializable - c2 = UniformCoordinates1d.from_definition(d) # test from_definition - assert c2 == c - def test_invalid_definition(self): # incorrect definition d = {"stop": 50} @@ -563,123 +525,13 @@ def test_from_definition_size(self): assert_equal(c.coordinates, np.array(["2018-01-01", "2018-01-02", "2018-01-03"]).astype(np.datetime64)) -class TestUniformCoordinatesProperties(object): - def test_area_bounds_point(self): - # numerical, ascending/descending and exact/inexact - c = UniformCoordinates1d(0, 50, 10, ctype="point") - assert_equal(c.area_bounds, [0, 50]) - c = UniformCoordinates1d(50, 0, -10, ctype="point") - assert_equal(c.area_bounds, [0, 50]) - c = UniformCoordinates1d(0, 49, 10, ctype="point") - assert_equal(c.area_bounds, [0, 40]) - c = UniformCoordinates1d(50, 9, -10, ctype="point") - assert_equal(c.area_bounds, [10, 50]) - - # datetime, ascending/descending and exact/inexact - c = UniformCoordinates1d("2018-01-01", "2018-01-04", "1,D", ctype="point") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-04"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-04", "2018-01-01", "-1,D", ctype="point") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-04"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-06", "2,D", ctype="point") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-05"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-06", "2018-01-01", "-2,D", ctype="point") - assert_equal(c.area_bounds, np.array(["2018-01-02", "2018-01-06"]).astype(np.datetime64)) - - def test_area_bounds_left(self): - # numerical, ascending/descending and exact/inexact/singleton - c = UniformCoordinates1d(0, 50, 10, ctype="left") - assert_equal(c.area_bounds, [0, 60]) - c = UniformCoordinates1d(50, 0, -10, ctype="left") - assert_equal(c.area_bounds, [0, 60]) - c = UniformCoordinates1d(0, 49, 10, ctype="left") - assert_equal(c.area_bounds, [0, 50.0]) - c = UniformCoordinates1d(50, 9, -10, ctype="left") - assert_equal(c.area_bounds, [10, 60.0]) - c = UniformCoordinates1d(0, 0, 10, ctype="left") - assert_equal(c.area_bounds, [0, 10]) - c = UniformCoordinates1d(0, 0, -10, ctype="left") - assert_equal(c.area_bounds, [0, 10]) - - # datetime, ascending/descending and exact/inexact/singleton - c = UniformCoordinates1d("2018-01-01", "2018-01-04", "1,D", ctype="left") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-05"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-04", "2018-01-01", "-1,D", ctype="left") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-05"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-06", "2,D", ctype="left") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-07"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-06", "2018-01-01", "-2,D", ctype="left") - assert_equal(c.area_bounds, np.array(["2018-01-02", "2018-01-08"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-01", "1,D", ctype="left") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-02"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-01", "-1,D", ctype="left") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-02"]).astype(np.datetime64)) - - def test_area_bounds_right(self): - # numerical, ascending/descending and exact/inexact/singleton - c = UniformCoordinates1d(0, 50, 10, ctype="right") - assert_equal(c.area_bounds, [-10, 50]) - c = UniformCoordinates1d(50, 0, -10, ctype="right") - assert_equal(c.area_bounds, [-10, 50]) - c = UniformCoordinates1d(0, 49, 10, ctype="right") - assert_equal(c.area_bounds, [-10, 40]) - c = UniformCoordinates1d(50, 9, -10, ctype="right") - assert_equal(c.area_bounds, [0, 50]) - c = UniformCoordinates1d(0, 0, 10, ctype="right") - assert_equal(c.area_bounds, [-10, 0]) - c = UniformCoordinates1d(0, 0, -10, ctype="right") - assert_equal(c.area_bounds, [-10, 0]) - - # datetime, ascending/descending and exact/inexact/singleton - c = UniformCoordinates1d("2018-01-01", "2018-01-04", "1,D", ctype="right") - assert_equal(c.area_bounds, np.array(["2017-12-31", "2018-01-04"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-04", "2018-01-01", "-1,D", ctype="right") - assert_equal(c.area_bounds, np.array(["2017-12-31", "2018-01-04"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-06", "2,D", ctype="right") - assert_equal(c.area_bounds, np.array(["2017-12-30", "2018-01-05"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-06", "2018-01-01", "-2,D", ctype="right") - assert_equal(c.area_bounds, np.array(["2017-12-31", "2018-01-06"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-01", "1,D", ctype="right") - assert_equal(c.area_bounds, np.array(["2017-12-31", "2018-01-01"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-01", "-1,D", ctype="right") - assert_equal(c.area_bounds, np.array(["2017-12-31", "2018-01-01"]).astype(np.datetime64)) - - def test_area_bounds_midpoint(self): - # numerical, ascending/descending and exact/inexact/singleton - c = UniformCoordinates1d(0, 50, 10, ctype="midpoint") - assert_equal(c.area_bounds, [-5, 55]) - c = UniformCoordinates1d(50, 0, -10, ctype="midpoint") - assert_equal(c.area_bounds, [-5, 55]) - c = UniformCoordinates1d(0, 49, 10, ctype="midpoint") - assert_equal(c.area_bounds, [-5, 45]) - c = UniformCoordinates1d(50, 9, -10, ctype="midpoint") - assert_equal(c.area_bounds, [5, 55]) - c = UniformCoordinates1d(0, 0, 10, ctype="midpoint") - assert_equal(c.area_bounds, [-5, 5]) - c = UniformCoordinates1d(0, 0, -10, ctype="midpoint") - assert_equal(c.area_bounds, [-5, 5]) - - # datetime, ascending/descending and exact/inexact/singleton - c = UniformCoordinates1d("2018-01-01", "2018-01-04", "1,D", ctype="midpoint") - assert_equal(c.area_bounds, np.array(["2017-12-31 12", "2018-01-04 12"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-04", "2018-01-01", "-1,D", ctype="midpoint") - assert_equal(c.area_bounds, np.array(["2017-12-31 12", "2018-01-04 12"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-06", "2,D", ctype="midpoint") - assert_equal(c.area_bounds, np.array(["2017-12-31", "2018-01-06"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-06", "2018-01-01", "-2,D", ctype="midpoint") - assert_equal(c.area_bounds, np.array(["2018-01-01", "2018-01-07"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-01", "1,D", ctype="midpoint") - assert_equal(c.area_bounds, np.array(["2017-12-31 12", "2018-01-01 12"]).astype(np.datetime64)) - c = UniformCoordinates1d("2018-01-01", "2018-01-01", "-1,D", ctype="midpoint") - assert_equal(c.area_bounds, np.array(["2017-12-31 12", "2018-01-01 12"]).astype(np.datetime64)) - - class TestUniformCoordinatesIndexing(object): def test_len(self): c = UniformCoordinates1d(0, 50, 10) assert len(c) == 6 def test_index(self): - c = UniformCoordinates1d(0, 50, 10, name="lat", ctype="point") + c = UniformCoordinates1d(0, 50, 10, name="lat") # int c2 = c[2] @@ -795,7 +647,7 @@ def test_index(self): c[10] def test_index_descending(self): - c = UniformCoordinates1d(50, 0, -10, name="lat", ctype="point") + c = UniformCoordinates1d(50, 0, -10, name="lat") # int c2 = c[2] @@ -885,47 +737,47 @@ def test_index_descending(self): assert c2.properties == c.properties assert_equal(c2.coordinates, [50, 40, 30, 10]) - def test_index_segment_lengths(self): - # array of segment_lengths - c = UniformCoordinates1d(0, 50, 10, segment_lengths=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) - c2 = c[1] - assert c2.segment_lengths == 0.2 or np.array_equal(c2.segment_lengths, [0.2]) - - c2 = c[1:3] - assert_equal(c2.segment_lengths, [0.2, 0.3]) +class TestArrayCoordinatesAreaBounds(object): + def test_get_area_bounds_numerical(self): + c = UniformCoordinates1d(0, 50, 10) - c2 = c[[1, 3]] - assert_equal(c2.segment_lengths, [0.2, 0.4]) + # point + area_bounds = c.get_area_bounds(None) + assert_equal(area_bounds, [0.0, 50.0]) - c2 = c[[4, 1, 2]] - assert_equal(c2.segment_lengths, [0.5, 0.2, 0.3]) + # uniform + area_bounds = c.get_area_bounds(0.5) + assert_equal(area_bounds, [-0.5, 50.5]) - c2 = c[[True, True, True, False, True, False]] - assert_equal(c2.segment_lengths, [0.1, 0.2, 0.3, 0.5]) + # segment + area_bounds = c.get_area_bounds([-0.2, 0.7]) + assert_equal(area_bounds, [-0.2, 50.7]) - # uniform segment_lengths - c = UniformCoordinates1d(0, 50, 10, segment_lengths=0.5) + # polygon (i.e. there would be corresponding offets for another dimension) + area_bounds = c.get_area_bounds([-0.2, -0.5, 0.7, 0.5]) + assert_equal(area_bounds, [-0.5, 50.7]) - c2 = c[1] - assert c2.segment_lengths == 0.5 + def test_get_area_bounds_datetime(self): + c = UniformCoordinates1d("2018-01-01", "2018-01-04", "1,D") - c2 = c[1:3] - assert c2.segment_lengths == 0.5 + # point + area_bounds = c.get_area_bounds(None) + assert_equal(area_bounds, make_coord_array(["2018-01-01", "2018-01-04"])) - c2 = c[[1, 3]] - assert c2.segment_lengths == 0.5 + # uniform + area_bounds = c.get_area_bounds("1,D") + assert_equal(area_bounds, make_coord_array(["2017-12-31", "2018-01-05"])) - c2 = c[[4, 1, 2]] - assert c2.segment_lengths == 0.5 + area_bounds = c.get_area_bounds("1,M") + assert_equal(area_bounds, make_coord_array(["2017-12-01", "2018-02-04"])) - c2 = c[[True, True, True, False, True, False]] - assert c2.segment_lengths == 0.5 + area_bounds = c.get_area_bounds("1,Y") + assert_equal(area_bounds, make_coord_array(["2017-01-01", "2019-01-04"])) - # inferred segment_lengths - c = UniformCoordinates1d(0, 50, 10) - c2 = c[1] - assert c2.segment_lengths == 10 or np.array_equal(c2.segment_lengths, [10]) + # segment + area_bounds = c.get_area_bounds(["0,h", "12,h"]) + assert_equal(area_bounds, make_coord_array(["2018-01-01 00:00", "2018-01-04 12:00"])) class TestUniformCoordinatesSelection(object): diff --git a/podpac/core/coordinates/uniform_coordinates1d.py b/podpac/core/coordinates/uniform_coordinates1d.py index 46ee1a36e..8d0a4c34d 100644 --- a/podpac/core/coordinates/uniform_coordinates1d.py +++ b/podpac/core/coordinates/uniform_coordinates1d.py @@ -37,10 +37,6 @@ class UniformCoordinates1d(Coordinates1d): Dimension name, one of 'lat', 'lon', 'time', 'alt'. coordinates : array, read-only Full array of coordinate values. - ctype : str - Coordinates type, one of'point', 'left', 'right', or 'midpoint'. - segment_lengths : array, float, timedelta - When ctype is a segment type, the segment lengths for the coordinates. See Also -------- @@ -56,7 +52,7 @@ class UniformCoordinates1d(Coordinates1d): step = tl.Union([tl.Float(), tl.Instance(np.timedelta64)], read_only=True) step.__doc__ = ":float, timedelta64: Signed, non-zero step between coordinates." - def __init__(self, start, stop, step=None, size=None, name=None, ctype=None, segment_lengths=None): + def __init__(self, start, stop, step=None, size=None, name=None): """ Create uniformly-spaced 1d coordinates from a `start`, `stop`, and `step` or `size`. @@ -72,11 +68,6 @@ def __init__(self, start, stop, step=None, size=None, name=None, ctype=None, seg Number of coordinates (either step or size required). name : str, optional Dimension name, one of 'lat', 'lon', 'time', or 'alt'. - ctype : str, optional - Coordinates type: 'point', 'left', 'right', or 'midpoint'. - segment_lengths: array, float, timedelta, optional - When ctype is a segment type, the segment lengths for the coordinates. By defaul, the segment_lengths are - equal the step. """ if step is not None and size is not None: @@ -105,6 +96,9 @@ def __init__(self, start, stop, step=None, size=None, name=None, ctype=None, seg % (type(start), type(stop), type(step)) ) + if fstep == 0: + raise ValueError("Uniformcoordinates1d step cannot be zero") + if fstep <= 0 and start < stop: raise ValueError("UniformCoordinates1d step must be greater than zero if start < stop.") @@ -116,18 +110,7 @@ def __init__(self, start, stop, step=None, size=None, name=None, ctype=None, seg self.set_trait("step", step) # set common properties - super(UniformCoordinates1d, self).__init__(name=name, ctype=ctype, segment_lengths=segment_lengths) - - @tl.default("ctype") - def _default_ctype(self): - return "midpoint" - - @tl.default("segment_lengths") - def _default_segment_lengths(self): - if self.ctype == "point": - return None - - return np.abs(self.step) + super(UniformCoordinates1d, self).__init__(name=name) def __eq__(self, other): if not super(UniformCoordinates1d, self).__eq__(other): @@ -184,8 +167,7 @@ def from_definition(cls, d): "start": 1, "stop": 10, "step": 0.5, - "name": "lat", - "ctype": "points" + "name": "lat" }) Arguments @@ -221,60 +203,37 @@ def __len__(self): return self.size def __getitem__(self, index): - if isinstance(index, slice): - # start, stop, step - if index.start is None: - start = self.start - elif index.start >= 0: - start = add_coord(self.start, self.step * min(index.start, self.size - 1)) - else: - start = add_coord(self.start, self.step * max(0, self.size + index.start)) - - if index.stop is None: - stop = self.stop - elif index.stop >= 0: - stop = add_coord(self.start, self.step * (min(index.stop, self.size) - 1)) - else: - stop = add_coord(self.start, self.step * max(0, self.size + index.stop - 1)) - - if index.step is None: - step = self.step - else: - step = index.step * self.step - if index.step < 0: - start, stop = stop, start - - # properties and segment_lengths - kwargs = self.properties - - if self.ctype != "point": - if isinstance(self.segment_lengths, np.ndarray): - kwargs["segment_lengths"] = self.segment_lengths[index] - elif self.segment_lengths != step: - kwargs["segment_lengths"] = self.segment_lengths - - # reroute empty slices to the else clause - if start > stop and step > 0: - return self[[]] - - return UniformCoordinates1d(start, stop, step, **kwargs) - + # fallback for non-slices + if not isinstance(index, slice): + return ArrayCoordinates1d(self.coordinates[index], **self.properties) + + # start, stop, step + if index.start is None: + start = self.start + elif index.start >= 0: + start = add_coord(self.start, self.step * min(index.start, self.size - 1)) else: - # coordinates - coordinates = self.coordinates[index] + start = add_coord(self.start, self.step * max(0, self.size + index.start)) - # properties and segment_lengths - kwargs = self.properties + if index.stop is None: + stop = self.stop + elif index.stop >= 0: + stop = add_coord(self.start, self.step * (min(index.stop, self.size) - 1)) + else: + stop = add_coord(self.start, self.step * max(0, self.size + index.stop - 1)) - if self.ctype != "point": - if isinstance(self.segment_lengths, np.ndarray): - kwargs["segment_lengths"] = self.segment_lengths[index] - else: - kwargs["segment_lengths"] = self.segment_lengths + if index.step is None: + step = self.step + else: + step = index.step * self.step + if index.step < 0: + start, stop = stop, start - kwargs["ctype"] = self.ctype + # empty slice + if start > stop and step > 0: + return ArrayCoordinates1d([], **self.properties) - return ArrayCoordinates1d(coordinates, **kwargs) + return UniformCoordinates1d(start, stop, step, **self.properties) # ------------------------------------------------------------------------------------------------------------------ # Properties @@ -345,11 +304,7 @@ def bounds(self): hi = add_coord(self.start, self.step * (self.size - 1)) if self.is_descending: lo, hi = hi, lo - - # read-only array with the correct dtype - bounds = np.array([lo, hi], dtype=self.dtype) - bounds.setflags(write=False) - return bounds + return lo, hi @property def argbounds(self): @@ -383,9 +338,20 @@ def copy(self): kwargs = self.properties return UniformCoordinates1d(self.start, self.stop, self.step, **kwargs) + def simplify(self): + """ Get the simplified/optimized representation of these coordinates. + + Returns + ------- + simplified : UniformCoordinates1d + These coordinates (the coordinates are already simplified). + """ + + return self + def _select(self, bounds, return_indices, outer): # TODO is there an easier way to do this with the new outer flag? - my_bounds = self.bounds.copy() + my_bounds = self.bounds # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision if self.dtype == np.datetime64: diff --git a/podpac/core/coordinates/utils.py b/podpac/core/coordinates/utils.py index 0a005d628..fad098d6f 100644 --- a/podpac/core/coordinates/utils.py +++ b/podpac/core/coordinates/utils.py @@ -209,7 +209,7 @@ def make_coord_delta(val): try: val = val.item() except ValueError: - raise TypeError("Invalid coordinate delta, unsuported type '%s'" % type(val)) + raise TypeError("Invalid coordinate delta, unsupported type '%s'" % type(val)) # type checking and conversion if isinstance(val, string_types): @@ -221,7 +221,7 @@ def make_coord_delta(val): elif isinstance(val, numbers.Number): val = float(val) else: - raise TypeError("Invalid coordinate delta, unsuported type '%s'" % type(val)) + raise TypeError("Invalid coordinate delta, unsupported type '%s'" % type(val)) return val @@ -460,11 +460,6 @@ def __init__(self, *args, **kwargs): super(Dimension, self).__init__(VALID_DIMENSION_NAMES, *args, **kwargs) -class CoordinateType(tl.Enum): - def __init__(self, *args, **kwargs): - super(CoordinateType, self).__init__(["point", "left", "right", "midpoint"], *args, **kwargs) - - def lower_precision_time_bounds(my_bounds, other_bounds, outer): """ When given two bounds of np.datetime64, this function will convert both bounds to the lower-precision (in terms of @@ -473,7 +468,7 @@ def lower_precision_time_bounds(my_bounds, other_bounds, outer): Parameters ----------- my_bounds : List(np.datetime64) - The bounds of the native coordinates of the dataset + The bounds of the coordinates of the dataset other_bounds : List(np.datetime64) The bounds used for the selection outer : bool @@ -482,7 +477,7 @@ def lower_precision_time_bounds(my_bounds, other_bounds, outer): Returns -------- my_bounds : List(np.datetime64) - The bounds of the native coordinates of the dataset at the new precision + The bounds of the coordinates of the dataset at the new precision other_bounds : List(np.datetime64) The bounds used for the selection at the new precision, if outer == True, otherwise return original coordinates """ diff --git a/podpac/core/data/array_source.py b/podpac/core/data/array_source.py index fa5c14445..86df77d6e 100644 --- a/podpac/core/data/array_source.py +++ b/podpac/core/data/array_source.py @@ -4,6 +4,7 @@ from __future__ import division, unicode_literals, print_function, absolute_import +import warnings from collections import OrderedDict from six import string_types @@ -12,24 +13,25 @@ import pandas as pd # Core dependency of xarray from podpac.core.utils import common_doc, ArrayTrait -from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource from podpac.core.cache import CacheCtrl +from podpac.core.node import NoCacheMixin from podpac.core.coordinates import Coordinates +from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource -class Array(DataSource): +class Array(NoCacheMixin, DataSource): """Create a DataSource from an array -- this node is mostly meant for small experiments Attributes ---------- source : np.ndarray Numpy array containing the source data - native_coordinates : podpac.Coordinates + coordinates : podpac.Coordinates The coordinates of the source data Notes ------ - `native_coordinates` need to supplied by the user when instantiating this node. + `coordinates` need to supplied by the user when instantiating this node. This Node is not meant for large arrays, and cause issues with caching. As such, this Node override the default cache behavior as having no cache -- its data is in RAM already and caching is not helpful. @@ -43,41 +45,50 @@ class Array(DataSource): >>> coords = podpac.Coordinates([podpac.clinspace(1, 10, 10, 'time'), podpac.clinspace(1, 32, 32, 'lat'), podpac.clinspace(1, 34, 34, 'lon')]) - >>> node = podpac.data.Array(source=data, native_coordinates=coords, outputs=['R', 'G', 'B']) + >>> node = podpac.data.Array(source=data, coordinates=coords, outputs=['R', 'G', 'B']) >>> output = node.eval(coords) """ - source = ArrayTrait().tag(readonly=True) - native_coordinates = tl.Instance(Coordinates, allow_none=False).tag(attr=True) + source = ArrayTrait().tag(attr=True) + coordinates = tl.Instance(Coordinates).tag(attr=True) - @tl.default("cache_ctrl") - def _cache_ctrl_default(self): - return CacheCtrl([]) + _repr_keys = ["shape", "interpolation"] @tl.validate("source") def _validate_source(self, d): - a = d["value"] try: - a.astype(float) + d["value"].astype(float) except: - raise ValueError("Array source must be numerical") - return a + raise ValueError("Array 'source' data must be numerical") + return d["value"] def _first_init(self, **kwargs): - # If Array is being created from Node.from_definition or Node.from_json, then we have to handle the - # native coordinates specifically. This is special. No other DataSource node needs to deserialize - # native_coordinates in this way because it is implemented specifically in the node through get_coordinates - if isinstance(kwargs.get("native_coordinates"), OrderedDict): - kwargs["native_coordinates"] = Coordinates.from_definition(kwargs["native_coordinates"]) - elif isinstance(kwargs.get("native_coordinates"), string_types): - kwargs["native_coordinates"] = Coordinates.from_json(kwargs["native_coordinates"]) + # If the coordinates were supplied explicitly, they may need to be deserialized. + if isinstance(kwargs.get("coordinates"), OrderedDict): + kwargs["coordinates"] = Coordinates.from_definition(kwargs["coordinates"]) + elif isinstance(kwargs.get("coordinates"), string_types): + kwargs["coordinates"] = Coordinates.from_json(kwargs["coordinates"]) return kwargs + @property + def shape(self): + """Returns the shape of :attr:`self.source` + + Returns + ------- + tuple + Shape of :attr:`self.source` + """ + return self.source.shape + @common_doc(COMMON_DATA_DOC) def get_data(self, coordinates, coordinates_index): """{get_data} """ - s = coordinates_index - d = self.create_output_array(coordinates, data=self.source[s]) + d = self.create_output_array(coordinates, data=self.source[coordinates_index]) return d + + def set_coordinates(self, value): + """ Not needed. """ + pass diff --git a/podpac/core/data/csv_source.py b/podpac/core/data/csv_source.py new file mode 100644 index 000000000..2a6fb0f19 --- /dev/null +++ b/podpac/core/data/csv_source.py @@ -0,0 +1,152 @@ +import pandas as pd +import traitlets as tl + +from podpac.core.utils import common_doc, cached_property +from podpac.core.coordinates import Coordinates, StackedCoordinates +from podpac.core.data.datasource import COMMON_DATA_DOC, DATA_DOC +from podpac.core.data.file_source import BaseFileSource, FileKeysMixin, LoadFileMixin + + +@common_doc(COMMON_DATA_DOC) +class CSV(FileKeysMixin, LoadFileMixin, BaseFileSource): + """Create a DataSource from a .csv file. + + This class assumes that the data has a storage format such as: + header 1, header 2, header 3, ... + row1_data1, row1_data2, row1_data3, ... + row2_data1, row2_data2, row2_data3, ... + + Attributes + ---------- + source : str + Path to the csv file + header : int, None + Row number containing the column names, default 0. Use None for no header. + dataset : pd.DataFrame + Raw Pandas DataFrame used to read the data + coordinates : :class:`podpac.Coordinates` + {coordinates} + data_key : str, int + data column number or column title, default 'data' + lat_key : str, int + latitude column number or column title, default 'lat' + lon_key : str, int + longitude column number or column title, default 'lon' + time_key : str, int + time column number or column title, default 'time' + alt_key : str, int + altitude column number or column title, default 'alt' + crs : str + Coordinate reference system of the coordinates + """ + + header = tl.Any(default_value=0).tag(attr=True) + lat_key = tl.Union([tl.Unicode(), tl.Int()], default_value="lat").tag(attr=True) + lon_key = tl.Union([tl.Unicode(), tl.Int()], default_value="lon").tag(attr=True) + time_key = tl.Union([tl.Unicode(), tl.Int()], default_value="time").tag(attr=True) + alt_key = tl.Union([tl.Unicode(), tl.Int()], default_value="alt").tag(attr=True) + data_key = tl.Union([tl.Unicode(), tl.Int(), tl.List(trait=tl.Unicode()), tl.List(trait=tl.Int())]).tag(attr=True) + + @tl.default("data_key") + def _default_data_key(self): + return super(CSV, self)._default_data_key() + + @tl.validate("data_key") + def _validate_data_key(self, d): + keys = d["value"] + if not isinstance(keys, list): + keys = [d["value"]] + + if isinstance(keys[0], int): + for col in keys: + if col not in self.available_data_cols: + raise ValueError("Invalid data_key %d, available columns are %s" % (col, self.available_data_cols)) + else: + for key in keys: + if key not in self.available_data_keys: + raise ValueError("Invalid data_key '%s', available keys are %s" % (key, self.available_data_keys)) + + return d["value"] + + @tl.default("outputs") + def _default_outputs(self): + if not isinstance(self.data_key, list): + return None + else: + return [self._get_key(elem) for elem in self.data_key] + + # ------------------------------------------------------------------------- + # public api methods + # ------------------------------------------------------------------------- + + def open_dataset(self, f): + return pd.read_csv(f, parse_dates=True, infer_datetime_format=True, header=self.header) + + @cached_property + def dims(self): + """ list of dataset coordinate dimensions """ + lookup = { + self._get_key(self.lat_key): "lat", + self._get_key(self.lon_key): "lon", + self._get_key(self.alt_key): "alt", + self._get_key(self.time_key): "time", + } + return [lookup[key] for key in self.dataset.columns if key in lookup] + + @cached_property + def keys(self): + """available data keys""" + return self.dataset.columns.tolist() + + @cached_property + def available_data_keys(self): + """available data keys""" + + dim_keys = [self._get_key(key) for key in [self.lat_key, self.lon_key, self.alt_key, self.time_key]] + keys = [key for key in self.keys if key not in dim_keys] + if len(keys) == 0: + raise ValueError("No data keys found in '%s'" % self.source) + return keys + + @cached_property + def available_data_cols(self): + return [self._get_col(key) for key in self.available_data_keys] + + @common_doc(COMMON_DATA_DOC) + def get_coordinates(self): + """{get_coordinates} + + Note: CSV files have StackedCoordinates. + """ + + coords = super(CSV, self).get_coordinates() + if len(coords) == 1: + return coords + stacked = StackedCoordinates(list(coords.values())) + return Coordinates([stacked], validate_crs=False, **coords.properties) + + @common_doc(COMMON_DATA_DOC) + def get_data(self, coordinates, coordinates_index): + """{get_data} + """ + + if not isinstance(self.data_key, list): + I = self._get_col(self.data_key) + else: + I = [self._get_col(key) for key in self.data_key] + data = self.dataset.iloc[coordinates_index[0], I] + return self.create_output_array(coordinates, data=data) + + # ------------------------------------------------------------------------- + # helper methods + # ------------------------------------------------------------------------- + + def _lookup_key(self, dim): + lookup = {"lat": self.lat_key, "lon": self.lon_key, "alt": self.alt_key, "time": self.time_key} + return self._get_key(lookup[dim]) + + def _get_key(self, key): + return self.dataset.columns[key] if isinstance(key, int) else key + + def _get_col(self, key): + return key if isinstance(key, int) else self.dataset.columns.get_loc(key) diff --git a/podpac/core/data/dataset_source.py b/podpac/core/data/dataset_source.py new file mode 100644 index 000000000..e9020330c --- /dev/null +++ b/podpac/core/data/dataset_source.py @@ -0,0 +1,79 @@ +import xarray as xr +import traitlets as tl + +from podpac.core.utils import common_doc, cached_property +from podpac.core.data.datasource import COMMON_DATA_DOC, DATA_DOC +from podpac.core.data.file_source import BaseFileSource, FileKeysMixin, LoadFileMixin + + +@common_doc(COMMON_DATA_DOC) +class Dataset(FileKeysMixin, LoadFileMixin, BaseFileSource): + """Create a DataSource node using xarray.open_dataset. + + Attributes + ---------- + source : str + Path to the dataset file. + In addition to local paths, file://, http://, ftp://, and s3:// transport protocols are supported. + dataset : xarray.Dataset + Dataset object. + coordinates : :class:`podpac.Coordinates` + {coordinates} + data_key : str + data key, default 'data' + lat_key : str + latitude key, default 'lat' + lon_key : str + longitude key, default 'lon' + time_key : str + time key, default 'time' + alt_key : str + altitude key, default 'alt' + crs : str + Coordinate reference system of the coordinates + extra_dim : dict + In cases where the data contain dimensions other than ['lat', 'lon', 'time', 'alt'], these dimensions need to be selected. + For example, if the data contains ['lat', 'lon', 'channel'], the second channel can be selected using `extra_dim=dict(channel=1)` + """ + + # dataset = tl.Instance(xr.Dataset).tag(readonly=True) + extra_dim = tl.Dict(allow_none=True).tag(attr=True) + + @tl.default("extra_dim") + def _default_extra_dim(self): + return None + + # ------------------------------------------------------------------------- + # public api properties and methods + # ------------------------------------------------------------------------- + + def open_dataset(self, fp): + return xr.open_dataset(fp) + + def close_dataset(self): + self.dataset.close() + + @cached_property + def dims(self): + """dataset coordinate dims""" + lookup = {self.lat_key: "lat", self.lon_key: "lon", self.alt_key: "alt", self.time_key: "time"} + return [lookup[dim] for dim in self.dataset.dims] + + @cached_property + def keys(self): + return list(self.dataset.keys()) + + @common_doc(COMMON_DATA_DOC) + def get_data(self, coordinates, coordinates_index): + """{get_data} + """ + + if not isinstance(self.data_key, list): + data = self.dataset[self.data_key] + data = data.transpose(*self.dataset.dims) + else: + data = self.dataset[self.data_key].to_array(dim="output") + tdims = tuple(self.dataset.dims) + ("output",) + data = data.transpose(*tdims) + + return self.create_output_array(coordinates, data.data[coordinates_index]) diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index 0d4f91eef..5bae3a2ff 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -10,7 +10,6 @@ from copy import deepcopy import warnings import logging -from six import string_types import numpy as np import xarray as xr @@ -20,16 +19,17 @@ from podpac.core.settings import settings from podpac.core.units import UnitsDataArray from podpac.core.coordinates import Coordinates, Coordinates1d, StackedCoordinates +from podpac.core.coordinates.utils import VALID_DIMENSION_NAMES, make_coord_delta, make_coord_delta_array from podpac.core.node import Node, NodeException -from podpac.core.utils import common_doc, trait_is_defined +from podpac.core.utils import common_doc from podpac.core.node import COMMON_NODE_DOC from podpac.core.node import node_eval -from podpac.core.data.interpolation import Interpolation, interpolation_trait +from podpac.core.interpolation.interpolation import Interpolation, InterpolationTrait log = logging.getLogger(__name__) DATA_DOC = { - "native_coordinates": "The coordinates of the data source.", + "coordinates": "The coordinates of the data source.", "get_data": """ This method must be defined by the data source implementing the DataSource class. When data source nodes are evaluated, this method is called with request coordinates and coordinate indexes. @@ -65,12 +65,12 @@ the data will be cast into UnitsDataArray using the returned data to fill values at the requested source coordinates. """, - "get_native_coordinates": """ - Returns a Coordinates object that describes the native coordinates of the data source. + "get_coordinates": """ + Returns a Coordinates object that describes the coordinates of the data source. In most cases, this method is defined by the data source implementing the DataSource class. - If method is not implemented by the data source, it will try to return ``self.native_coordinates`` - if ``self.native_coordinates`` is not None. + If method is not implemented by the data source, it will try to return ``self.coordinates`` + if ``self.coordinates`` is not None. Otherwise, this method will raise a NotImplementedError. @@ -134,8 +134,8 @@ class DataSource(Node): source : Any The location of the source. Depending on the child node this can be a filepath, numpy array, or dictionary as a few examples. - native_coordinates : :class:`podpac.Coordinates` - {native_coordinates} + coordinates : :class:`podpac.Coordinates` + {coordinates} interpolation : str, dict, optional {interpolation_long} nan_vals : List, optional @@ -143,21 +143,29 @@ class DataSource(Node): coordinate_index_type : str, optional Type of index to use for data source. Possible values are ``['list', 'numpy', 'xarray', 'pandas']`` Default is 'numpy' - + cache_coordinates : bool + Whether to cache coordinates using the podpac ``cache_ctrl``. Default False. + cache_output : bool + Should the node's output be cached? If not provided or None, uses default based on + settings["CACHE_DATASOURCE_OUTPUT_DEFAULT"]. If True, outputs will be cached and retrieved from cache. If False, + outputs will not be cached OR retrieved from cache (even if they exist in cache). Notes ----- - Custom DataSource Nodes must implement the :meth:`get_data` and :meth:`get_native_coordinates` methods. + Custom DataSource Nodes must implement the :meth:`get_data` and :meth:`get_coordinates` methods. """ - source = tl.Any().tag(readonly=True) - native_coordinates = tl.Instance(Coordinates).tag(readonly=True) - interpolation = interpolation_trait() + interpolation = InterpolationTrait().tag(attr=True) + nan_vals = tl.List().tag(attr=True) + boundary = tl.Dict().tag(attr=True) + coordinate_index_type = tl.Enum(["slice", "list", "numpy"], default_value="numpy") # , "xarray", "pandas"], - nan_vals = tl.List(allow_none=True).tag(attr=True) + cache_coordinates = tl.Bool(False) + cache_output = tl.Bool() # privates _interpolation = tl.Instance(Interpolation) + _coordinates = tl.Instance(Coordinates, allow_none=True, default_value=None, read_only=True) _original_requested_coordinates = tl.Instance(Coordinates, allow_none=True) _requested_source_coordinates = tl.Instance(Coordinates) @@ -165,17 +173,44 @@ class DataSource(Node): _requested_source_data = tl.Instance(UnitsDataArray) _evaluated_coordinates = tl.Instance(Coordinates) - # when native_coordinates is not defined, default calls get_native_coordinates - @tl.default("native_coordinates") - def _default_native_coordinates(self): - return self.get_native_coordinates() - # this adds a more helpful error message if user happens to try an inspect _interpolation before evaluate @tl.default("_interpolation") def _default_interpolation(self): self._set_interpolation() return self._interpolation + @tl.validate("boundary") + def _validate_boundary(self, d): + val = d["value"] + for dim, boundary in val.items(): + if dim not in VALID_DIMENSION_NAMES: + raise ValueError("Invalid dimension '%s' in boundary" % dim) + if np.array(boundary).ndim == 0: + try: + delta = make_coord_delta(boundary) + except ValueError: + raise ValueError( + "Invalid boundary for dimension '%s' ('%s' is not a valid coordinate delta)" % (dim, boundary) + ) + + if np.array(delta).astype(float) < 0: + raise ValueError("Invalid boundary for dimension '%s' (%s < 0)" % (dim, delta)) + + if np.array(boundary).ndim == 1: + make_coord_delta_array(boundary) + raise NotImplementedError("Non-centered boundary not yet supported for dimension '%s'" % dim) + + if np.array(boundary).ndim == 2: + for elem in boundary: + make_coord_delta_array(elem) + raise NotImplementedError("Non-uniform boundary not yet supported for dimension '%s'" % dim) + + return val + + @tl.default("cache_output") + def _cache_output_default(self): + return settings["CACHE_DATASOURCE_OUTPUT_DEFAULT"] + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @@ -212,6 +247,22 @@ def interpolators(self): else: return OrderedDict() + @property + def coordinates(self): + """{coordinates}""" + + if self._coordinates is not None: + nc = self._coordinates + elif self.cache_coordinates and self.has_cache("coordinates"): + nc = self.get_cache("coordinates") + self.set_trait("_coordinates", nc) + else: + nc = self.get_coordinates() + self.set_trait("_coordinates", nc) + if self.cache_coordinates: + self.put_cache(nc, "coordinates") + return nc + # ------------------------------------------------------------------------------------------------------------------ # Private Methods # ------------------------------------------------------------------------------------------------------------------ @@ -267,9 +318,8 @@ def _get_data(self): udata_array = udata_array.sel(output=self.output) # fill nan_vals in data array - if self.nan_vals: - for nan_val in self.nan_vals: - udata_array.data[udata_array.data == nan_val] = np.nan + for nan_val in self.nan_vals: + udata_array.data[udata_array.data == nan_val] = np.nan return udata_array @@ -282,10 +332,10 @@ def _get_data(self): def eval(self, coordinates, output=None): """Evaluates this node using the supplied coordinates. - The native coordinates are mapped to the requested coordinates, interpolated if necessary, and set to + The coordinates are mapped to the requested coordinates, interpolated if necessary, and set to `_requested_source_coordinates` with associated index `_requested_source_coordinates_index`. The requested source coordinates and index are passed to `get_data()` returning the source data at the - native coordinatesset to `_requested_source_data`. Finally `_requested_source_data` is interpolated + coordinatesset to `_requested_source_data`. Finally `_requested_source_data` is interpolated using the `interpolate` method and set to the `output` attribute of the node. @@ -323,7 +373,7 @@ def eval(self, coordinates, output=None): self._original_requested_coordinates = coordinates # check for missing dimensions - for c in self.native_coordinates.values(): + for c in self.coordinates.values(): if isinstance(c, Coordinates1d): if c.name not in coordinates.udims: raise ValueError("Cannot evaluate these coordinates, missing dim '%s'" % c.name) @@ -335,10 +385,10 @@ def eval(self, coordinates, output=None): extra = [] for c in coordinates.values(): if isinstance(c, Coordinates1d): - if c.name not in self.native_coordinates.udims: + if c.name not in self.coordinates.udims: extra.append(c.name) elif isinstance(c, StackedCoordinates): - if all(dim not in self.native_coordinates.udims for dim in c.dims): + if all(dim not in self.coordinates.udims for dim in c.dims): extra.append(c.name) coordinates = coordinates.drop(extra) @@ -346,18 +396,16 @@ def eval(self, coordinates, output=None): self._evaluated_coordinates = deepcopy(coordinates) # transform coordinates into native crs if different - if self.native_coordinates.crs.lower() != coordinates.crs.lower(): - coordinates = coordinates.transform(self.native_coordinates.crs) + if self.coordinates.crs.lower() != coordinates.crs.lower(): + coordinates = coordinates.transform(self.coordinates.crs) - # intersect the native coordinates with requested coordinates - # to get native coordinates within requested coordinates bounds + # intersect the coordinates with requested coordinates to get coordinates within requested coordinates bounds # TODO: support coordinate_index_type parameter to define other index types - ( - self._requested_source_coordinates, - self._requested_source_coordinates_index, - ) = self.native_coordinates.intersect(coordinates, outer=True, return_indices=True) + (rsc, rsci) = self.coordinates.intersect(coordinates, outer=True, return_indices=True) + self._requested_source_coordinates = rsc + self._requested_source_coordinates_index = rsci - # if requested coordinates and native coordinates do not intersect, shortcut with nan UnitsDataArary + # if requested coordinates and coordinates do not intersect, shortcut with nan UnitsDataArary if self._requested_source_coordinates.size == 0: if output is None: output = self.create_output_array(self._evaluated_coordinates) @@ -371,12 +419,11 @@ def eval(self, coordinates, output=None): self._set_interpolation() # interpolate requested coordinates before getting data - ( - self._requested_source_coordinates, - self._requested_source_coordinates_index, - ) = self._interpolation.select_coordinates( + (rsc, rsci) = self._interpolation.select_coordinates( self._requested_source_coordinates, self._requested_source_coordinates_index, coordinates ) + self._requested_source_coordinates = rsc + self._requested_source_coordinates_index = rsci # Check the coordinate_index_type if self.coordinate_index_type == "slice": # Most restrictive @@ -404,7 +451,7 @@ def eval(self, coordinates, output=None): # if not provided, create output using the evaluated coordinates, or # if provided, set the order of coordinates to match the output dims - # Note that at this point the coordinates are in the same CRS as the native_coordinates + # Note that at this point the coordinates are in the same CRS as the coordinates if output is None: requested_dims = None output_dims = None @@ -426,6 +473,9 @@ def eval(self, coordinates, output=None): + "request Coordinates coordinate reference system ({})".format(coordinates.crs) ) + # get indexed boundary + self._requested_source_boundary = self._get_boundary(self._requested_source_coordinates_index) + # interpolate data into output output = self._interpolation.interpolate( self._requested_source_coordinates, self._requested_source_data, coordinates, output @@ -436,7 +486,7 @@ def eval(self, coordinates, output=None): o = o.transpose(*output_dims) o.data[:] = output.transpose(*output_dims).data - # if requested crs is differented than native coordinates, + # if requested crs is differented than coordinates, # fabricate a new output with the original coordinates and new values if self._evaluated_coordinates.crs != coordinates.crs: output = self.create_output_array(self._evaluated_coordinates, data=output[:].values) @@ -449,15 +499,15 @@ def eval(self, coordinates, output=None): def find_coordinates(self): """ - Get the available native coordinates for the Node. For a DataSource, this is just the native_coordinates. + Get the available coordinates for the Node. For a DataSource, this is just the coordinates. Returns ------- coords_list : list - singleton list containing the native_coordinates (Coordinates object) + singleton list containing the coordinates (Coordinates object) """ - return [self.native_coordinates] + return [self.coordinates] @common_doc(COMMON_DATA_DOC) def get_data(self, coordinates, coordinates_index): @@ -471,79 +521,52 @@ def get_data(self, coordinates, coordinates_index): raise NotImplementedError @common_doc(COMMON_DATA_DOC) - def get_native_coordinates(self): - """{get_native_coordinates} + def get_coordinates(self): + """{get_coordinates} Raises - -------- + ------ NotImplementedError - Raised if get_native_coordinates is not implemented by data source subclass. + This needs to be implemented by derived classes """ + raise NotImplementedError - if trait_is_defined(self, "native_coordinates"): - return self.native_coordinates - else: - raise NotImplementedError( - "{0}.native_coordinates is not defined and " - "{0}.get_native_coordinates() is not implemented".format(self.__class__.__name__) - ) + def set_coordinates(self, coordinates, force=False): + """ Set the coordinates. Used by Compositors as an optimization. - @property - @common_doc(COMMON_DATA_DOC) - def base_definition(self): - """Base node definition for DataSource nodes. - - Returns - ------- - {definition_return} + Arguments + --------- + coordinates : :class:`podpac.Coordinates` + Coordinates to set. Usually these are coordinates that are shared across compositor sources. + + NOTE: This is only currently used by SMAPCompositor. It should potentially be moved to the SMAPSource. """ - d = super(DataSource, self).base_definition - - # check attrs and remove unnecesary attrs - attrs = d.get("attrs", {}) - if "source" in attrs: - raise NodeException("The 'source' property cannot be tagged as an 'attr'") - if "interpolation" in attrs: - raise NodeException("The 'interpolation' property cannot be tagged as an 'attr'") - if "nan_vals" in attrs and not self.nan_vals: - del attrs["nan_vals"] - - # set source or lookup_source - if isinstance(self.source, Node): - d["lookup_source"] = self.source - elif isinstance(self.source, np.ndarray): - d["source"] = self.source.tolist() - else: - d["source"] = self.source + if force or not self.trait_is_defined("_coordinates"): + self.set_trait("_coordinates", coordinates) - # assign the interpolation definition - d["interpolation"] = self.interpolation + def _get_boundary(self, index): + """ + Select the boundary for the given the coordinates index. Only non-uniform boundary arrays need to be indexed. - return d + Arguments + --------- + index : tuple + Coordinates index (e.g. coordinates_index) - # ------------------------------------------------------------------------------------------------------------------ - # Operators/Magic Methods - # ------------------------------------------------------------------------------------------------------------------ - def __repr__(self): - source_name = str(self.__class__.__name__) - - rep = "{}".format(source_name) - if source_name != "DataSource": - rep += " DataSource" - - source_disp = self.source if isinstance(self.source, string_types) else "\n{}".format(self.source) - rep += "\n\tsource: {}".format(source_disp) - if trait_is_defined(self, "native_coordinates"): - rep += "\n\tnative_coordinates: " - for c in self.native_coordinates.values(): - if isinstance(c, Coordinates1d): - rep += "\n\t\t%s: %s" % (c.name, c) - elif isinstance(c, StackedCoordinates): - for _c in c: - rep += "\n\t\t%s[%s]: %s" % (c.name, _c.name, _c) - - # rep += '{}: {}'.format(c.name, c) - rep += "\n\tinterpolation: {}".format(self.interpolation) - - return rep + Returns + ------- + boundary : dict + Indexed boundary. Uniform boundaries are unchanged and non-uniform boundary arrays are indexed. + """ + + boundary = {} + for c, I in zip(self.coordinates.values(), index): + for dim in c.dims: + if dim not in self.boundary: + pass + elif np.array(self.boundary[dim]).ndim == 2: + boundary[dim] = np.array(self.boundary[dim][I]) + else: + boundary[dim] = self.boundary[dim] + return boundary diff --git a/podpac/core/data/file.py b/podpac/core/data/file.py deleted file mode 100644 index 25995b044..000000000 --- a/podpac/core/data/file.py +++ /dev/null @@ -1,856 +0,0 @@ -""" -Datasources from files -""" - -from __future__ import division, unicode_literals, print_function, absolute_import - -from io import BytesIO -from collections import OrderedDict -from six import string_types - -import numpy as np -import traitlets as tl -import pandas as pd -import xarray as xr -import pyproj -import logging - -from podpac.core.settings import settings -from podpac.core.utils import common_doc, trait_is_defined -from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource -from podpac.core.coordinates import Coordinates, UniformCoordinates1d, ArrayCoordinates1d, StackedCoordinates -from podpac.core.coordinates import RotatedCoordinates -from podpac.core.coordinates.utils import Dimension, VALID_DIMENSION_NAMES - -# Optional dependencies -from lazy_import import lazy_module, lazy_class - -rasterio = lazy_module("rasterio") -h5py = lazy_module("h5py") -boto3 = lazy_module("boto3") -requests = lazy_module("requests") -zarr = lazy_module("zarr") -zarrGroup = lazy_class("zarr.Group") -s3fs = lazy_module("s3fs") - -# Set up logging -_logger = logging.getLogger(__name__) - - -@common_doc(COMMON_DATA_DOC) -class DatasetSource(DataSource): - """ - Base class for dataset/file datasources. - - This class facilitates setting the native_coordinates from the coordinates defined in the file, including - decoding datetimes when necessary. The coordinates are automatically read from the dataset when possible, and - methods are provided for customization when necessary. - - Attributes - ---------- - source : str - Path to the data source file. - dataset - Dataset object. - native_coordinates : Coordinates - {native_coordinates} - lat_key : str - latitude key, default 'lat' - lon_key : str - longitude key, default 'lon' - time_key : str - time key, default 'time' - alt_key : str - altitude key, default 'alt' - data_key : str - data key - output_keys : list - list of data keys, for multiple-output nodes - crs : str - Coordinate reference system of the coordinates. - cf_time : bool - decode CF datetimes - cf_units : str - units, when decoding CF datetimes - cf_calendar : str - calendar, when decoding CF datetimes - """ - - source = tl.Unicode(default_value=None, allow_none=True).tag(readonly=True) - data_key = tl.Unicode(allow_none=True).tag(attr=True) - output_keys = tl.List(allow_none=True).tag(attr=True) - lat_key = tl.Unicode(allow_none=True, default_value="lat").tag(attr=True) - lon_key = tl.Unicode(allow_none=True, default_value="lon").tag(attr=True) - time_key = tl.Unicode(allow_none=True, default_value="time").tag(attr=True) - alt_key = tl.Unicode(allow_none=True, default_value="alt").tag(attr=True) - crs = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - cf_time = tl.Bool(False).tag(attr=True) - cf_units = tl.Unicode(allow_none=True).tag(attr=True) - cf_calendar = tl.Unicode(allow_none=True).tag(attr=True) - - dataset = tl.Any().tag(readonly=True) - - @tl.default("data_key") - def _default_data_key(self): - return None - - @tl.default("output_keys") - def _default_output_keys(self): - return None - - @tl.validate("output") - def _validate_output(self, d): - return d["value"] - - def init(self): - super(DatasetSource, self).init() - - # check the dataset and dims - self.dataset - self.dims - - # validation and defaults for data_key, output_keys, outputs, and output - if self.data_key is not None and self.output_keys is not None: - raise TypeError("%s cannot have both 'data_key' or 'output_keys' defined" % self.__class__.__name__) - - if self.data_key is None and self.output_keys is None: - available_keys = self.available_keys - if len(available_keys) == 1: - self.set_trait("data_key", available_keys[0]) - else: - self.set_trait("output_keys", available_keys) - - if self.outputs is not None: - if self.data_key is not None: - raise TypeError("outputs must be None for single-output nodes") - if len(self.outputs) != len(self.output_keys): - raise ValueError( - "outputs and output_keys size mismatch (%d != %d)" % (len(self.outputs), len(self.output_keys)) - ) - else: - self.set_trait("outputs", self.output_keys) - - if self.output is not None: - if self.outputs is None: - raise TypeError("Invalid output '%s' (output must be None for single-output nodes)." % self.output) - if self.output not in self.outputs: - raise ValueError("Invalid output '%s' (available outputs are %s)" % (self.output, self.outputs)) - - @common_doc(COMMON_DATA_DOC) - def get_native_coordinates(self): - """{native_coordinates} - """ - cs = [] - dims = self.dims - for dim in dims: - if dim == "lat": - cs.append(self.get_lat()) - elif dim == "lon": - cs.append(self.get_lon()) - elif dim == "time": - cs.append(self.get_time()) - elif dim == "alt": - cs.append(self.get_alt()) - - return Coordinates(cs, dims=dims, crs=self.crs) - - def get_lat(self): - """Get the native latitude coordinates from the dataset.""" - return self.dataset[self.lat_key] - - def get_lon(self): - """Get the native longitude coordinates from the dataset.""" - return self.dataset[self.lon_key] - - def get_time(self): - """Get the native time coordinates from the dataset, decoding datetimes if requested.""" - values = self.dataset[self.time_key] - if self.cf_time: - values = xr.coding.times.decode_cf_datetime(values, self.cf_units, self.cf_calendar) - return values - - def get_alt(self): - """Get the native altitude coordinates from the dataset.""" - return self.dataset[self.alt_key] - - def close_dataset(self): - """ Close the dataset. Subclasses should implement as needed. """ - pass - - @property - def dims(self): - raise NotImplementedError - - @property - def available_keys(self): - raise NotImplementedError - - @property - @common_doc(COMMON_DATA_DOC) - def base_definition(self): - """Base node definition for DatasetSource nodes. - - Returns - ------- - {definition_return} - """ - - d = super(DatasetSource, self).base_definition - - # remove unnecessary attrs - attrs = d.get("attrs", {}) - if self.data_key is None and "data_key" in attrs: - del attrs["data_key"] - if self.output_keys is None and "output_keys" in attrs: - del attrs["output_keys"] - if self.crs is None and "crs" in attrs: - del attrs["crs"] - if self.outputs == self.output_keys and "outputs" in attrs: - del attrs["outputs"] - if "lat" not in self.dims and "lat_key" in attrs: - del attrs["lat_key"] - if "lon" not in self.dims and "lon_key" in attrs: - del attrs["lon_key"] - if "alt" not in self.dims and "alt_key" in attrs: - del attrs["alt_key"] - if "time" not in self.dims and "time_key" in attrs: - del attrs["time_key"] - if self.cf_time is False: - if "cf_time" in attrs: - del attrs["cf_time"] - if "cf_units" in attrs: - del attrs["cf_units"] - if "cf_calendar" in attrs: - del attrs["cf_calendar"] - - return d - - -@common_doc(COMMON_DATA_DOC) -class Dataset(DatasetSource): - """Create a DataSource node using xarray.open_dataset. - - Attributes - ---------- - source : str - Path to the dataset file. - dataset : xarray.Dataset - Dataset object. - native_coordinates : Coordinates - {native_coordinates} - data_key : str - data key, default 'data' - lat_key : str - latitude key, default 'lat' - lon_key : str - longitude key, default 'lon' - time_key : str - time key, default 'time' - alt_key : str - altitude key, default 'alt' - crs : str - Coordinate reference system of the coordinates - extra_dim : dict - In cases where the data contain dimensions other than ['lat', 'lon', 'time', 'alt'], these dimensions need to be selected. - For example, if the data contains ['lat', 'lon', 'channel'], the second channel can be selected using `extra_dim=dict(channel=1)` - """ - - dataset = tl.Instance(xr.Dataset).tag(readonly=True) - - # node attrs - extra_dim = tl.Dict(allow_none=True, default_value=None).tag(attr=True) - - @tl.default("dataset") - def _open_dataset(self): - return xr.open_dataset(self.source) - - def close_dataset(self): - self.dataset.close() - - @common_doc(COMMON_DATA_DOC) - def get_data(self, coordinates, coordinates_index): - """{get_data} - """ - if self.data_key is not None: - data = self.dataset[self.data_key] - data = data.transpose(*self.dataset.dims) - else: - data = self.dataset[self.output_keys].to_array(dim="output") - tdims = tuple(self.dataset.dims) + ("output",) - data = data.transpose(*tdims) - return self.create_output_array(coordinates, data.data[coordinates_index]) - - @property - def dims(self): - """dataset coordinate dims""" - lookup = {self.lat_key: "lat", self.lon_key: "lon", self.alt_key: "alt", self.time_key: "time"} - for dim in self.dataset.dims: - if dim not in lookup: - raise ValueError( - "Unexpected dimension '%s' in xarray dataset (source '%s'). " - "Use 'lat_key', 'lon_key', 'time_key' and 'alt_key' to select dataset dimensions" - % (dim, self.source) - ) - - return [lookup[dim] for dim in self.dataset.dims] - - @property - def available_keys(self): - """available data keys""" - return list(self.dataset.keys()) - - @property - @common_doc(COMMON_DATA_DOC) - def base_definition(self): - """Base node definition for DatasetSource nodes. - - Returns - ------- - {definition_return} - """ - - d = super(Dataset, self).base_definition - - # remove unnecessary attrs - attrs = d.get("attrs", {}) - if self.extra_dim is None and "extra_dim" in attrs: - del attrs["extra_dim"] - - return d - - -@common_doc(COMMON_DATA_DOC) -class CSV(DatasetSource): - """Create a DataSource from a .csv file. - - This class assumes that the data has a storage format such as: - header 1, header 2, header 3, ... - row1_data1, row1_data2, row1_data3, ... - row2_data1, row2_data2, row2_data3, ... - - Attributes - ---------- - source : str - Path to the csv file - header : int, None - Row number containing the column names, default 0. Use None for no header. - dataset : pd.DataFrame - Raw Pandas DataFrame used to read the data - native_coordinates : Coordinates - {native_coordinates} - data_key : str, int - data column number or column title, default 'data' - lat_key : str, int - latitude column number or column title, default 'lat' - lon_key : str, int - longitude column number or column title, default 'lon' - time_key : str, int - time column number or column title, default 'time' - alt_key : str, int - altitude column number or column title, default 'alt' - crs : str - Coordinate reference system of the coordinates - """ - - header = tl.Any(default_value=0).tag(attr=True) - lat_key = tl.Union([tl.Unicode(), tl.Int()], default_value="lat").tag(attr=True) - lon_key = tl.Union([tl.Unicode(), tl.Int()], default_value="lon").tag(attr=True) - time_key = tl.Union([tl.Unicode(), tl.Int()], default_value="time").tag(attr=True) - alt_key = tl.Union([tl.Unicode(), tl.Int()], default_value="alt").tag(attr=True) - data_key = tl.Union([tl.Unicode(), tl.Int()], allow_none=True, default_value=None).tag(attr=True) - output_keys = tl.Union([tl.List(tl.Unicode()), tl.List(tl.Int())], allow_none=True, default_value=None).tag( - attr=True - ) - - dataset = tl.Instance(pd.DataFrame).tag(readonly=True) - - @tl.default("dataset") - def _open_dataset(self): - return pd.read_csv(self.source, parse_dates=True, infer_datetime_format=True, header=self.header) - - def _get_key(self, key): - return self.dataset.columns[key] if isinstance(key, int) else key - - def _get_col(self, key): - return key if isinstance(key, int) else self.dataset.columns.get_loc(key) - - def get_lat(self): - """Get latitude coordinates from the csv file.""" - return self.dataset[self._get_key(self.lat_key)].values - - def get_lon(self): - """Get longitude coordinates from the csv file.""" - return self.dataset[self._get_key(self.lon_key)].values - - def get_time(self): - """Get time coordinates from the csv file.""" - return self.dataset[self._get_key(self.time_key)].values - - def get_alt(self): - """Get altitude coordinates from the csv file.""" - return self.dataset[self._get_key(self.alt_key)].values - - @common_doc(COMMON_DATA_DOC) - def get_native_coordinates(self): - """{get_native_coordinates} - - Note: CSV files have StackedCoordinates. - """ - - coords = super(CSV, self).get_native_coordinates() - if len(coords) == 1: - return coords - stacked = StackedCoordinates(list(coords.values())) - return Coordinates([stacked], **coords.properties) - - @common_doc(COMMON_DATA_DOC) - def get_data(self, coordinates, coordinates_index): - """{get_data} - """ - if self.data_key is not None: - I = self._get_col(self.data_key) - else: - I = [self._get_col(key) for key in self.output_keys] - data = self.dataset.iloc[coordinates_index[0], I] - return self.create_output_array(coordinates, data=data) - - @property - def dims(self): - """dataset coordinate dims""" - lookup = { - self._get_key(self.lat_key): "lat", - self._get_key(self.lon_key): "lon", - self._get_key(self.alt_key): "alt", - self._get_key(self.time_key): "time", - } - return [lookup[key] for key in self.dataset.columns if key in lookup] - - @property - def available_keys(self): - """available data keys""" - dims_keys = [self.lat_key, self.lon_key, self.alt_key, self.time_key] - return [key for key in self.dataset.columns if key not in dims_keys] - - @property - @common_doc(COMMON_DATA_DOC) - def base_definition(self): - """Base node definition for DatasetSource nodes. - - Returns - ------- - {definition_return} - """ - - d = super(CSV, self).base_definition - - # remove unnecessary attrs - attrs = d.get("attrs", {}) - if self.header == 0 and "header" in attrs: - del attrs["header"] - - return d - - -@common_doc(COMMON_DATA_DOC) -class H5PY(DatasetSource): - """Create a DataSource node using h5py. - - Attributes - ---------- - source : str - Path to the h5py file - dataset : h5py.File - The h5py file object used to read the file - native_coordinates : Coordinates - {native_coordinates} - file_mode : str, optional - Default is 'r'. The mode used to open the HDF5 file. Options are r, r+, w, w- or x, a (see h5py.File). - data_key : str, int - data key, default 'data' - lat_key : str, int - latitude coordinates key, default 'lat' - lon_key : str, int - longitude coordinates key, default 'lon' - time_key : str, int - time coordinates key, default 'time' - alt_key : str, int - altitude coordinates key, default 'alt' - crs : str - Coordinate reference system of the coordinates - cf_time : bool - decode CF datetimes - cf_units : str - units, when decoding CF datetimes - cf_calendar : str - calendar, when decoding CF datetimes - """ - - file_mode = tl.Unicode(default_value="r").tag(readonly=True) - - dims = tl.List(allow_none=False) - - @tl.default("dims") - def _dims_default(self): - """dataset coordinate dims""" - key = self.data_key - if key is None: - key = self.available_keys[0] - try: - dims = self.dataset[key].attrs["_ARRAY_DIMENSIONS"] - except: - lookup = {self.lat_key: "lat", self.lon_key: "lon", self.alt_key: "alt", self.time_key: "time"} - dims = [lookup[key] for key in H5PY._find_h5py_keys(self.dataset) if key in lookup] - return dims - - @tl.default("dataset") - def _open_dataset(self): - # TODO: dataset should not open by default - # prefer with as: syntax - return h5py.File(self.source, self.file_mode) - - def close_dataset(self): - """Closes the file. """ - self.dataset.close() - - @common_doc(COMMON_DATA_DOC) - def get_data(self, coordinates, coordinates_index): - """{get_data} - """ - data = self.create_output_array(coordinates) - if self.data_key is not None: - data[:] = self.dataset[self.data_key][coordinates_index] - else: - for key, name in zip(self.output_keys, self.outputs): - data.sel(output=name)[:] = self.dataset[key][coordinates_index] - return data - - def attrs(self, key="/"): - """Dataset or group key for which attributes will be summarized. - """ - return dict(self.dataset[key].attrs) - - @property - def available_keys(self): - dims_keys = [self.lat_key, self.lon_key, self.alt_key, self.time_key] - return [key for key in H5PY._find_h5py_keys(self.dataset) if key not in dims_keys] - - @staticmethod - def _find_h5py_keys(obj, keys=[]): - if isinstance(obj, (h5py.Group, h5py.File)): - for k in obj.keys(): - keys = H5PY._find_h5py_keys(obj[k], keys) - else: - keys.append(obj.name) - return keys - keys = list(set(keys)) - keys.sort() - return keys - - -class Zarr(DatasetSource): - """Create a DataSource node using zarr. - - Attributes - ---------- - source : str - Path to the Zarr archive - file_mode : str, optional - Default is 'r'. The mode used to open the Zarr archive. Options are r, r+, w, w- or x, a. - dataset : zarr.Group - The h5py file object used to read the file - native_coordinates : Coordinates - {native_coordinates} - data_key : str, int - data key, default 'data' - lat_key : str, int - latitude coordinates key, default 'lat' - lon_key : str, int - longitude coordinates key, default 'lon' - time_key : str, int - time coordinates key, default 'time' - alt_key : str, int - altitude coordinates key, default 'alt' - crs : str - Coordinate reference system of the coordinates - cf_time : bool - decode CF datetimes - cf_units : str - units, when decoding CF datetimes - cf_calendar : str - calendar, when decoding CF datetimes - """ - - file_mode = tl.Unicode(default_value="r").tag(readonly=True) - - # optional inputs - access_key_id = tl.Unicode() - secret_access_key = tl.Unicode() - region_name = tl.Unicode() - dims = tl.List(allow_none=False) - coordinate_index_type = "slice" - - @tl.default("dims") - def _dims_default(self): - """dataset coordinate dims""" - key = self.data_key - if key is None: - key = self.available_keys[0] - try: - dims = self.dataset[key].attrs["_ARRAY_DIMENSIONS"] - except: - lookup = {self.lat_key: "lat", self.lon_key: "lon", self.alt_key: "alt", self.time_key: "time"} - dims = [lookup[key] for key in self.dataset if key in lookup] - return dims - - @tl.default("access_key_id") - def _get_access_key_id(self): - return settings["AWS_ACCESS_KEY_ID"] - - @tl.default("secret_access_key") - def _get_secret_access_key(self): - return settings["AWS_SECRET_ACCESS_KEY"] - - @tl.default("region_name") - def _get_region_name(self): - return settings["AWS_REGION_NAME"] - - @tl.default("dataset") - def _open_dataset(self): - if self.source is None: - raise TypeError("Zarr node requires 'source' or 'dataset'") - - if self.source.startswith("s3://"): - root = self.source.strip("s3://") - kwargs = {"region_name": self.region_name} - s3 = s3fs.S3FileSystem(key=self.access_key_id, secret=self.secret_access_key, client_kwargs=kwargs) - s3map = s3fs.S3Map(root=root, s3=s3, check=False) - store = s3map - else: - store = str(self.source) # has to be a string in Python2.7 for local files - - try: - return zarr.open(store, mode=self.file_mode) - except ValueError: - raise ValueError("No Zarr store found at path '%s'" % self.source) - - @common_doc(COMMON_DATA_DOC) - def get_data(self, coordinates, coordinates_index): - """{get_data} - """ - data = self.create_output_array(coordinates) - if self.data_key is not None: - data[:] = self.dataset[self.data_key][coordinates_index] - else: - for key, name in zip(self.output_keys, self.outputs): - data.sel(output=name)[:] = self.dataset[key][coordinates_index] - return data - - @property - def available_keys(self): - """available data keys""" - dim_keys = [self.lat_key, self.lon_key, self.alt_key, self.time_key] - return [key for key in self.dataset if key not in dim_keys] - - -# TODO -@common_doc(COMMON_DATA_DOC) -class Rasterio(DataSource): - r"""Create a DataSource using Rasterio. - - Parameters - ---------- - source : str, :class:`io.BytesIO` - Path to the data source - band : int - The 'band' or index for the variable being accessed in files such as GeoTIFFs - - Attributes - ---------- - dataset : :class:`rasterio._io.RasterReader` - A reference to the datasource opened by rasterio - native_coordinates : :class:`podpac.Coordinates` - {native_coordinates} - crs : str, optional - The coordinate reference system. Normally this will come directly from the file, but this allows users to - specify the crs in case this information is missing from the file. - - Notes - ------ - The source could be a path to an s3 bucket file, e.g.: s3://landsat-pds/L8/139/045/LC81390452014295LGN00/LC81390452014295LGN00_B1.TIF - In that case, make sure to set the environmental variable: - * Windows: set CURL_CA_BUNDLE=\Library\ssl\cacert.pem - * Linux: export CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - """ - - source = tl.Union([tl.Unicode(), tl.Instance(BytesIO)]).tag(readonly=True) - dataset = tl.Any().tag(readonly=True) - crs = tl.Unicode(allow_none=True) - - @property - def nan_vals(self): - return list(self.dataset.nodatavals) - - # node attrs - band = tl.CInt(allow_none=True).tag(attr=True) - - @tl.default("band") - def _band_default(self): - if (self.outputs is not None) and (self.output is not None): - band = self.outputs.index(self.output) - elif self.outputs is None: - band = 1 - else: - band = None # All bands - return band - - @tl.default("dataset") - def _open_dataset(self): - """Opens the data source - - Returns - ------- - :class:`rasterio.io.DatasetReader` - Rasterio dataset - """ - - # TODO: dataset should not open by default - # prefer with as: syntax - - if isinstance(self.source, BytesIO): - # https://rasterio.readthedocs.io/en/latest/topics/memory-files.html - # TODO: this is still not working quite right - likely need to work - # out the BytesIO format or how we are going to read/write in memory - with rasterio.MemoryFile(self.source) as memfile: - return memfile.open(driver="GTiff") - - # local file - else: - return rasterio.open(self.source) - - def close_dataset(self): - """Closes the file for the datasource - """ - self.dataset.close() - - @common_doc(COMMON_DATA_DOC) - def get_native_coordinates(self): - """{get_native_coordinates} - - The default implementation tries to find the lat/lon coordinates based on dataset.affine. - It cannot determine the alt or time dimensions, so child classes may - have to overload this method. - """ - - # check to see if the coordinates are rotated used affine - affine = self.dataset.transform - - if isinstance(self.dataset.crs, rasterio.crs.CRS) and "init" in self.dataset.crs: - crs = self.dataset.crs["init"].upper() - elif isinstance(self.dataset.crs, dict) and "init" in self.dataset.crs: - crs = self.dataset.crs["init"].upper() - else: - try: - crs = pyproj.CRS(self.dataset.crs).to_wkt() - except pyproj.exceptions.CRSError: - if self.crs is None: - raise RuntimeError("Unexpected rasterio crs '%s'" % self.dataset.crs) - else: - crs = self.crs - - return Coordinates.from_geotransform(affine.to_gdal(), self.dataset.shape, crs) - - @common_doc(COMMON_DATA_DOC) - def get_data(self, coordinates, coordinates_index): - """{get_data} - """ - data = self.create_output_array(coordinates) - slc = coordinates_index - - # read data within coordinates_index window - window = ((slc[0].start, slc[0].stop), (slc[1].start, slc[1].stop)) - - if self.outputs is not None: # read all the bands - raster_data = self.dataset.read(out_shape=(len(self.outputs),) + tuple(coordinates.shape), window=window) - raster_data = np.moveaxis(raster_data, 0, 2) - else: # read the requested band - raster_data = self.dataset.read(self.band, out_shape=tuple(coordinates.shape), window=window) - - # set raster data to output array - data.data.ravel()[:] = raster_data.ravel() - return data - - @property - def band_count(self): - """The number of bands - - Returns - ------- - int - The number of bands in the dataset - """ - - if not hasattr(self, "_band_count"): - self._band_count = self.dataset.count - - return self._band_count - - @property - def band_descriptions(self): - """A description of each band contained in dataset.tags - - Returns - ------- - OrderedDict - Dictionary of band_number: band_description pairs. The band_description values are a dictionary, each - containing a number of keys -- depending on the metadata - """ - - if not hasattr(self, "_band_descriptions"): - self._band_descriptions = OrderedDict((i, self.dataset.tags(i + 1)) for i in range(self.band_count)) - - return self._band_descriptions - - @property - def band_keys(self): - """An alternative view of band_descriptions based on the keys present in the metadata - - Returns - ------- - dict - Dictionary of metadata keys, where the values are the value of the key for each band. - For example, band_keys['TIME'] = ['2015', '2016', '2017'] for a dataset with three bands. - """ - - if not hasattr(self, "_band_keys"): - keys = {k for i in range(self.band_count) for k in self.band_descriptions[i]} # set - self._band_keys = {k: [self.band_descriptions[i].get(k) for i in range(self.band_count)] for k in keys} - - return self._band_keys - - def get_band_numbers(self, key, value): - """Return the bands that have a key equal to a specified value. - - Parameters - ---------- - key : str / list - Key present in the metadata of the band. Can be a single key, or a list of keys. - value : str / list - Value of the key that should be returned. Can be a single value, or a list of values - - Returns - ------- - np.ndarray - An array of band numbers that match the criteria - """ - if (not hasattr(key, "__iter__") or isinstance(key, string_types)) and ( - not hasattr(value, "__iter__") or isinstance(value, string_types) - ): - key = [key] - value = [value] - - match = np.ones(self.band_count, bool) - for k, v in zip(key, value): - match = match & (np.array(self.band_keys[k]) == v) - matches = np.where(match)[0] + 1 - - return matches diff --git a/podpac/core/data/file_source.py b/podpac/core/data/file_source.py new file mode 100644 index 000000000..3516b48f3 --- /dev/null +++ b/podpac/core/data/file_source.py @@ -0,0 +1,249 @@ +""" +Datasources from files +""" + +from __future__ import division, unicode_literals, print_function, absolute_import + +import sys + +if sys.version_info.major == 2: + from urllib2 import urlopen +else: + from urllib.request import urlopen + +from io import BytesIO +import logging + +import traitlets as tl +import xarray as xr + +from lazy_import import lazy_module, lazy_class + +boto3 = lazy_module("boto3") +s3fs = lazy_module("s3fs") +requests = lazy_module("requests") + +from podpac.core.utils import common_doc, cached_property +from podpac.core.coordinates import Coordinates +from podpac.core.authentication import S3Mixin +from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource + +# TODO common doc +_logger = logging.getLogger(__name__) + + +class BaseFileSource(DataSource): + """ + Base class for data sources loaded from file. + + Attributes + ---------- + source : str + Path to the data source. + coordinates : :class:`podpac.Coordinates` + {coordinates} + dataset : Any + dataset object + """ + + source = tl.Unicode().tag(attr=True) + + # list of attribute names, used by __repr__ and __str__ to display minimal info about the node + _repr_keys = ["source", "interpolation"] + + @tl.default("source") + def _default_source(self): + raise ValueError("%s 'source' required" % self.__class__.__name__) + + # ------------------------------------------------------------------------- + # public api properties and methods + # ------------------------------------------------------------------------- + + @property + def dataset(self): + raise NotImplementedError() + + def close_dataset(self): + """ Close opened resources. Subclasses should implement if appropriate. """ + pass + + +class LoadFileMixin(S3Mixin): + """ + Mixin to load and cache files using various transport protocols. + + Attributes + ---------- + cache_dataset : bool + Default is False. Whether to cache the dataset after loading (as an optimization). + """ + + cache_dataset = tl.Bool(False) + + @cached_property + def _dataset_caching_node(self): + # stub node containing only the source node attr + return BaseFileSource(source=self.source, cache_ctrl=self.cache_ctrl) + + @cached_property + def dataset(self): + # use the _dataset_caching_node "stub" here because the only node attr we care about is the source + if self.cache_dataset and self._dataset_caching_node.has_cache(key="dataset"): + data = self._dataset_caching_node.get_cache(key="dataset") + with BytesIO(data) as f: + return self._open(BytesIO(data), cache=False) + elif self.source.startswith("s3://"): + _logger.info("Loading AWS resource: %s" % self.source) + with self.s3.open(self.source, "rb") as f: + return self._open(f) + elif self.source.startswith("http://") or self.source.startswith("https://"): + _logger.info("Downloading: %s" % self.source) + response = requests.get(self.source) + with BytesIO(response.content) as f: + return self._open(f) + elif self.source.startswith("ftp://"): + _logger.info("Downloading: %s" % self.source) + addinfourl = urlopen(self.source) + with BytesIO(addinfourl.read()) as f: + return self._open(f) + elif self.source.startswith("file://"): + addinfourl = urlopen(self.source) + with BytesIO(addinfourl.read()) as f: + return self._open(f) + else: + with open(self.source, "rb") as f: + return self._open(f) + + def _open(self, f, cache=True): + if self.cache_dataset and cache: + self._dataset_caching_node.put_cache(f.read(), key="dataset") + f.seek(0) + return self.open_dataset(f) + + def open_dataset(self, f): + """ TODO """ + raise NotImplementedError() + + +@common_doc(COMMON_DATA_DOC) +class FileKeysMixin(tl.HasTraits): + """ + Mixin to specify data and coordinates dimensions keys. + + Attributes + ---------- + lat_key : str + latitude key, default 'lat' + lon_key : str + longitude key, default 'lon' + time_key : str + time key, default 'time' + alt_key : str + altitude key, default 'alt' + data_key : str, list + data key, or list of data keys for multiple-output nodes + crs : str + Coordinate reference system of the coordinates. + cf_time : bool + decode CF datetimes + cf_units : str + units, when decoding CF datetimes + cf_calendar : str + calendar, when decoding CF datetimes + """ + + data_key = tl.Union([tl.Unicode(), tl.List(trait=tl.Unicode())]).tag(attr=True) + lat_key = tl.Unicode(default_value="lat").tag(attr=True) + lon_key = tl.Unicode(default_value="lon").tag(attr=True) + time_key = tl.Unicode(default_value="time").tag(attr=True) + alt_key = tl.Unicode(default_value="alt").tag(attr=True) + crs = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + cf_time = tl.Bool(default_value=False).tag(attr=True) + cf_units = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + cf_calendar = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + skip_validation = tl.Bool(False).tag(attr=True) + + @property + def _repr_keys(self): + """ list of attribute names, used by __repr__ and __str__ to display minimal info about the node""" + keys = ["source", "interpolation"] + if len(self.available_data_keys) > 1 and not isinstance(self.data_key, list): + keys.append("data_key") + return keys + + @tl.default("data_key") + def _default_data_key(self): + if len(self.available_data_keys) == 1: + return self.available_data_keys[0] + else: + return self.available_data_keys + + @tl.validate("data_key") + def _validate_data_key(self, d): + keys = d["value"] + if self.skip_validation: + return keys + if not isinstance(keys, list): + keys = [d["value"]] + for key in keys: + if key not in self.available_data_keys: + raise ValueError("Invalid data_key '%s', available keys are %s" % (key, self.available_data_keys)) + return d["value"] + + @tl.default("outputs") + def _default_outputs(self): + if not isinstance(self.data_key, list): + return None + else: + return self.data_key + + @tl.validate("outputs") + def _validate_outputs(self, d): + value = d["value"] + if self.skip_validation: + return value + if not isinstance(self.data_key, list): + if value is not None: + raise TypeError("outputs must be None for single-output nodes") + else: + if value is None: + raise TypeError("outputs and data_key mismatch (outputs=None, data_key=%s)" % self.data_key) + if len(value) != len(self.data_key): + raise ValueError("outputs and data_key size mismatch (%d != %d)" % (len(value), len(self.data_key))) + return value + + # ------------------------------------------------------------------------- + # public api properties and methods + # ------------------------------------------------------------------------- + + @property + def keys(self): + raise NotImplementedError + + @property + def dims(self): + raise NotImplementedError + + @cached_property + def available_data_keys(self): + """available data keys""" + dim_keys = [self.lat_key, self.lon_key, self.alt_key, self.time_key] + keys = [key for key in self.keys if key not in dim_keys] + if len(keys) == 0: + raise ValueError("No data keys found in '%s'" % self.source) + return keys + + def _lookup_key(self, dim): + lookup = {"lat": self.lat_key, "lon": self.lon_key, "alt": self.alt_key, "time": self.time_key} + return lookup[dim] + + @common_doc(COMMON_DATA_DOC) + def get_coordinates(self): + """{get_coordinates} + """ + + cs = [self.dataset[self._lookup_key(dim)] for dim in self.dims] + if self.cf_time and "time" in self.dims: + time_ind = self.dims.index("time") + cs[time_ind] = xr.coding.times.decode_cf_datetime(cs[time_ind], self.cf_units, self.cf_calendar) + return Coordinates(cs, dims=self.dims, crs=self.crs) diff --git a/podpac/core/data/h5py_source.py b/podpac/core/data/h5py_source.py new file mode 100644 index 000000000..7cb89a1c6 --- /dev/null +++ b/podpac/core/data/h5py_source.py @@ -0,0 +1,118 @@ +import traitlets as tl + +from lazy_import import lazy_module, lazy_class + +h5py = lazy_module("h5py") + +from podpac.core.utils import common_doc, cached_property +from podpac.core.data.datasource import COMMON_DATA_DOC, DATA_DOC +from podpac.core.data.file_source import BaseFileSource, FileKeysMixin + + +@common_doc(COMMON_DATA_DOC) +class H5PY(FileKeysMixin, BaseFileSource): + """Create a DataSource node using h5py. + + Attributes + ---------- + source : str + Path to the h5py file + dataset : h5py.File + The h5py file object used to read the file + coordinates : :class:`podpac.Coordinates` + {coordinates} + file_mode : str, optional + Default is 'r'. The mode used to open the HDF5 file. Options are r, r+, w, w- or x, a (see h5py.File). + data_key : str, int + data key, default 'data' + lat_key : str, int + latitude coordinates key, default 'lat' + lon_key : str, int + longitude coordinates key, default 'lon' + time_key : str, int + time coordinates key, default 'time' + alt_key : str, int + altitude coordinates key, default 'alt', + array_dims : list of str + dataset dims, default ['lat', 'lon', 'alt', time'], for each _key defined + crs : str + Coordinate reference system of the coordinates + cf_time : bool + decode CF datetimes + cf_units : str + units, when decoding CF datetimes + cf_calendar : str + calendar, when decoding CF datetimes + """ + + file_mode = tl.Unicode(default_value="r").tag(readonly=True) + array_dims = tl.List(trait=tl.Unicode()).tag(readonly=True) + + @cached_property + def dataset(self): + return h5py.File(self.source, self.file_mode) + + def close_dataset(self): + """Closes the file. """ + self.dataset.close() + + # ------------------------------------------------------------------------- + # public api methods + # ------------------------------------------------------------------------- + + @cached_property + def dims(self): + """ dataset coordinate dims """ + try: + if not isinstance(self.data_key, list): + key = self.data_key + else: + key = self.data_key[0] + return self.dataset[key].attrs["_ARRAY_DIMENSIONS"] + except: + lookup = {self.lat_key: "lat", self.lon_key: "lon", self.alt_key: "alt", self.time_key: "time"} + + # make sure array_dim key is in self.keys + if self.array_dims: + inv_lookup = {v: k for k, v in lookup.items()} + return [key for key in self.array_dims if inv_lookup[key] in self.keys] + else: + return [lookup[key] for key in self.keys if key in lookup] + + @cached_property + def keys(self): + return H5PY._find_h5py_keys(self.dataset) + + @common_doc(COMMON_DATA_DOC) + def get_data(self, coordinates, coordinates_index): + """{get_data} + """ + data = self.create_output_array(coordinates) + if not isinstance(self.data_key, list): + data[:] = self.dataset[self.data_key][coordinates_index] + else: + for key, name in zip(self.data_key, self.outputs): + data.sel(output=name)[:] = self.dataset[key][coordinates_index] + return data + + # ------------------------------------------------------------------------- + # additional methods and properties + # ------------------------------------------------------------------------- + + def dataset_attrs(self, key="/"): + """Dataset or group key for which attributes will be summarized. + """ + return dict(self.dataset[key].attrs) + + @staticmethod + def _find_h5py_keys(obj, keys=[]): + # recursively find keys + + if isinstance(obj, (h5py.Group, h5py.File)): + for k in obj.keys(): + keys = H5PY._find_h5py_keys(obj[k], keys) + else: + keys.append(obj.name) + return keys + keys = sorted(list(set(keys))) + return keys diff --git a/podpac/core/data/ogc.py b/podpac/core/data/ogc.py index 6252b5b89..1ab5bb4f9 100644 --- a/podpac/core/data/ogc.py +++ b/podpac/core/data/ogc.py @@ -12,7 +12,7 @@ import traitlets as tl from podpac.core.settings import settings -from podpac.core.utils import common_doc +from podpac.core.utils import common_doc, cached_property from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource from podpac.core.coordinates import Coordinates, UniformCoordinates1d, ArrayCoordinates1d @@ -47,17 +47,17 @@ class WCS(DataSource): URL of the WCS server endpoint version : str Default is 1.0.0. WCS version string. - wcs_coordinates : Coordinates + wcs_coordinates : :class:`podpac.Coordinates` The coordinates of the WCS source """ - source = tl.Unicode().tag(readonly=True) - wcs_coordinates = tl.Instance(Coordinates).tag(readonly=True) # default below - - # node attrs + source = tl.Unicode().tag(attr=True) layer_name = tl.Unicode().tag(attr=True) - version = tl.Unicode(WCS_DEFAULT_VERSION).tag(attr=True) - crs = tl.Unicode(WCS_DEFAULT_CRS).tag(attr=True) + version = tl.Unicode(default_value=WCS_DEFAULT_VERSION).tag(attr=True) + crs = tl.Unicode(default_value=WCS_DEFAULT_CRS).tag(attr=True) + + # list of attribute names, used by __repr__ and __str__ to display minimal info about the node + _repr_keys = ["source", "interpolation"] _get_capabilities_qs = tl.Unicode("SERVICE=WCS&REQUEST=DescribeCoverage&" "VERSION={version}&COVERAGE={layer}") _get_data_qs = tl.Unicode( @@ -67,9 +67,8 @@ class WCS(DataSource): "WIDTH={width}&HEIGHT={height}&TIME={time}" ) - # TODO: This should be capabilities_url, not get_ @property - def get_capabilities_url(self): + def capabilities_url(self): """Constructs the url that requests the WCS capabilities Returns @@ -77,16 +76,16 @@ def get_capabilities_url(self): str The url that requests the WCS capabilities """ + return self.source + "?" + self._get_capabilities_qs.format(version=self.version, layer=self.layer_name) - @tl.default("wcs_coordinates") - def get_wcs_coordinates(self): - """Retrieves the native coordinates reported by the WCS service. + @cached_property + def wcs_coordinates(self): + """ Coordinates reported by the WCS service. Returns ------- Coordinates - The native coordinates reported by the WCS service. Notes ------- @@ -97,8 +96,9 @@ def get_wcs_coordinates(self): Exception Raises this if the required dependencies are not installed. """ + if requests is not None: - capabilities = requests.get(self.get_capabilities_url) + capabilities = requests.get(self.capabilities_url) if capabilities.status_code != 200: raise Exception("Could not get capabilities from WCS server") capabilities = capabilities.text @@ -110,10 +110,10 @@ def get_wcs_coordinates(self): else: http = urllib3.PoolManager() - r = http.request("GET", self.get_capabilities_url) + r = http.request("GET", self.capabilities_url) capabilities = r.data if r.status != 200: - raise Exception("Could not get capabilities from WCS server:" + self.get_capabilities_url) + raise Exception("Could not get capabilities from WCS server:" + self.capabilities_url) else: raise Exception("Do not have a URL request library to get WCS data.") @@ -166,15 +166,9 @@ def get_wcs_coordinates(self): ] ) - @property @common_doc(COMMON_DATA_DOC) - def native_coordinates(self): - """{native_coordinates} - - Returns - ------- - Coordinates - {native_coordinates} + def get_coordinates(self): + """{get_coordinates} Notes ------ @@ -214,6 +208,11 @@ def get_data(self, coordinates, coordinates_index): output = self.create_output_array(coordinates) dotime = "time" in self.wcs_coordinates.dims + wbound = coordinates["lon"].bounds[0] - coordinates["lon"].step / 2.0 + ebound = coordinates["lon"].bounds[1] + coordinates["lon"].step / 2.0 + sbound = coordinates["lat"].bounds[0] - coordinates["lat"].step / 2.0 + nbound = coordinates["lat"].bounds[1] + coordinates["lat"].step / 2.0 + if "time" in coordinates.dims and dotime: sd = np.timedelta64(0, "s") times = [str(t + sd) for t in coordinates["time"].coordinates] @@ -228,10 +227,10 @@ def get_data(self, coordinates, coordinates_index): + self._get_data_qs.format( version=self.version, layer=self.layer_name, - w=min(coordinates["lon"].area_bounds), - e=max(coordinates["lon"].area_bounds), - s=min(coordinates["lat"].area_bounds), - n=max(coordinates["lat"].area_bounds), + w=wbound, + e=ebound, + s=sbound, + n=nbound, width=coordinates["lon"].size, height=coordinates["lat"].size, time=time, @@ -269,7 +268,7 @@ def get_data(self, coordinates, coordinates_index): output.data[i, ...] = dataset.read() except Exception as e: # Probably python 2 print(e) - tmppath = os.path.join(settings["DISK_CACHE_DIR"], "wcs_temp.tiff") + tmppath = os.path.join(settings.cache_path, "wcs_temp.tiff") if not os.path.exists(os.path.split(tmppath)[0]): os.makedirs(os.path.split(tmppath)[0]) @@ -297,10 +296,10 @@ def get_data(self, coordinates, coordinates_index): + self._get_data_qs.format( version=self.version, layer=self.layer_name, - w=min(coordinates["lon"].area_bounds), - e=max(coordinates["lon"].area_bounds), - s=min(coordinates["lat"].area_bounds), - n=max(coordinates["lat"].area_bounds), + w=wbound, + e=ebound, + s=sbound, + n=nbound, width=coordinates["lon"].size, height=coordinates["lat"].size, time=time, @@ -339,7 +338,7 @@ def get_data(self, coordinates, coordinates_index): output.data[:] = dataset.read() except Exception as e: # Probably python 2 print(e) - tmppath = os.path.join(settings["DISK_CACHE_DIR"], "wcs_temp.tiff") + tmppath = os.path.join(settings.cache_path, "wcs_temp.tiff") if not os.path.exists(os.path.split(tmppath)[0]): os.makedirs(os.path.split(tmppath)[0]) open(tmppath, "wb").write(content) @@ -365,11 +364,8 @@ def get_data(self, coordinates, coordinates_index): @property def base_ref(self): - """Summary - - Returns - ------- - TYPE - Description - """ + """ definition base_ref """ + if not self.layer_name: + return super(WCS, self).base_ref + return self.layer_name.rsplit(".", 1)[1] diff --git a/podpac/core/data/pydap_source.py b/podpac/core/data/pydap_source.py index e53220c18..03b061c1f 100644 --- a/podpac/core/data/pydap_source.py +++ b/podpac/core/data/pydap_source.py @@ -4,15 +4,18 @@ from __future__ import division, unicode_literals, print_function, absolute_import +import logging + import numpy as np import traitlets as tl +import requests # Helper utility for optional imports from lazy_import import lazy_module, lazy_class # Internal dependencies from podpac.core import authentication -from podpac.core.utils import common_doc +from podpac.core.utils import common_doc, cached_property from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource # Optional dependencies @@ -21,137 +24,88 @@ lazy_module("pydap.model") +_logger = logging.getLogger(__name__) + + @common_doc(COMMON_DATA_DOC) -class PyDAP(DataSource): +class PyDAP(authentication.RequestsSessionMixin, DataSource): """Create a DataSource from an OpenDAP server feed. Attributes ---------- - auth_class : :class:`podpac.authentication.Session` - :class:`requests.Session` derived class providing authentication credentials. - When username and password are provided, an auth_session is created using this class. - auth_session : :class:`podpac.authentication.Session` - Instance of the auth_class. This is created if username and password is supplied, but this object can also be - supplied directly - datakey : str + data_key : str Pydap 'key' for the data to be retrieved from the server. Datasource may have multiple keys, so this key determines which variable is returned from the source. dataset : pydap.model.DatasetType The open pydap dataset. This is provided for troubleshooting. - native_coordinates : Coordinates - {native_coordinates} - password : str, optional - Password used for authenticating against OpenDAP server. WARNING: this is stored as plain-text, provide - auth_session instead if you have security concerns. + coordinates : :class:`podpac.Coordinates` + {coordinates} source : str URL of the OpenDAP server. - username : str, optional - Username used for authenticating against OpenDAP server. WARNING: this is stored as plain-text, provide - auth_session instead if you have security concerns. """ - source = tl.Unicode().tag(readonly=True) - dataset = tl.Instance("pydap.model.DatasetType").tag(readonly=True) - - # node attrs - datakey = tl.Unicode().tag(attr=True) - - # optional inputs - auth_class = tl.Type(authentication.Session) - auth_session = tl.Instance(authentication.Session, allow_none=True) - username = tl.Unicode(default_value=None, allow_none=True) - password = tl.Unicode(default_value=None, allow_none=True) - - @tl.default("auth_session") - def _auth_session_default(self): + source = tl.Unicode().tag(attr=True) + data_key = tl.Unicode().tag(attr=True) - # requires username and password - if not self.username or not self.password: - return None + # list of attribute names, used by __repr__ and __str__ to display minimal info about the node + _repr_keys = ["source", "interpolation"] - # requires auth_class - # TODO: default auth_class? - if not self.auth_class: - return None - - # instantiate and check utl + # hostname for RequestsSession is source. Try parsing off netloc + @tl.default("hostname") + def _hostname(self): try: - session = self.auth_class(username=self.username, password=self.password) - session.get(self.source + ".dds") + return requests.utils.urlparse(self.source).netloc except: - # TODO: catch a 403 error - return None - - return session + return self.source - @tl.default("dataset") - def _open_dataset(self): - """Summary - - Parameters - ---------- - source : str, optional - Description + @common_doc(COMMON_DATA_DOC) + def get_coordinates(self): + """{get_coordinates} - Returns - ------- - TYPE - Description + Raises + ------ + NotImplementedError + PyDAP cannot create coordinates. A child class must implement this method. """ + raise NotImplementedError("PyDAP cannot create coordinates. A child class must implement this method.") + @cached_property + def dataset(self): # auth session - # if self.auth_session: try: - dataset = self._open_url() + return self._open_url() except Exception: # TODO handle a 403 error # TODO: Check Url (probably inefficient...) try: - self.auth_session.get(self.source + ".dds") - dataset = self._open_url() + self.session.get(self.source + ".dds") + return self._open_url() except Exception: # TODO: handle 403 error - print("Warning, dataset could not be opened. Check login credentials.") - dataset = None - - return dataset + _logger.exception("Error opening PyDap url '%s'" % self.source) + raise RuntimeError("Could not open PyDap url '%s'.\nCheck login credentials." % self.source) def _open_url(self): - return pydap.client.open_url(self.source, session=self.auth_session) - - @common_doc(COMMON_DATA_DOC) - def get_native_coordinates(self): - """{get_native_coordinates} - - Raises - ------ - NotImplementedError - DAP has no mechanism for creating coordinates automatically, so this is left up to child classes. - """ - raise NotImplementedError( - "DAP has no mechanism for creating coordinates" - + ", so this is left up to child class " - + "implementations." - ) + return pydap.client.open_url(self.source, session=self.session) @common_doc(COMMON_DATA_DOC) def get_data(self, coordinates, coordinates_index): """{get_data} """ - data = self.dataset[self.datakey][tuple(coordinates_index)] + data = self.dataset[self.data_key][tuple(coordinates_index)] # PyDAP 3.2.1 gives a numpy array for the above, whereas 3.2.2 needs the .data attribute to get a numpy array if not isinstance(data, np.ndarray) and hasattr(data, "data"): data = data.data d = self.create_output_array(coordinates, data=data.reshape(coordinates.shape)) return d - @property + @cached_property def keys(self): """The list of available keys from the OpenDAP dataset. Returns ------- List - The list of available keys from the OpenDAP dataset. Any of these keys can be set as self.datakey + The list of available keys from the OpenDAP dataset. Any of these keys can be set as self.data_key """ return self.dataset.keys() diff --git a/podpac/core/data/rasterio_source.py b/podpac/core/data/rasterio_source.py new file mode 100644 index 000000000..c3494a476 --- /dev/null +++ b/podpac/core/data/rasterio_source.py @@ -0,0 +1,181 @@ +from __future__ import division, unicode_literals, print_function, absolute_import + +from collections import OrderedDict +import io + +from six import string_types +import traitlets as tl +import numpy as np +import pyproj + +from lazy_import import lazy_module + +rasterio = lazy_module("rasterio") + +from podpac.core.utils import common_doc, cached_property +from podpac.core.coordinates import UniformCoordinates1d, Coordinates +from podpac.core.data.datasource import COMMON_DATA_DOC, DATA_DOC +from podpac.core.data.file_source import BaseFileSource, LoadFileMixin + + +@common_doc(COMMON_DATA_DOC) +class Rasterio(LoadFileMixin, BaseFileSource): + """Create a DataSource using rasterio. + + Attributes + ---------- + source : str, :class:`io.BytesIO` + Path to the data source + dataset : :class:`rasterio._io.RasterReader` + A reference to the datasource opened by rasterio + coordinates : :class:`podpac.Coordinates` + {coordinates} + band : int + The 'band' or index for the variable being accessed in files such as GeoTIFFs. Use None for all bounds. + crs : str, optional + The coordinate reference system. Normally this will come directly from the file, but this allows users to + specify the crs in case this information is missing from the file. + """ + + # dataset = tl.Instance(rasterio.DatasetReader).tag(readonly=True) + band = tl.CInt(allow_none=True).tag(attr=True) + crs = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + driver = tl.Unicode(allow_none=True, default_value=None) + + @tl.default("band") + def _band_default(self): + if self.outputs is not None and self.output is not None: + return self.outputs.index(self.output) + elif self.outputs is None: + return 1 + else: + return None # All bands + + # ------------------------------------------------------------------------- + # public api methods + # ------------------------------------------------------------------------- + + @cached_property + def nan_vals(self): + return list(self.dataset.nodatavals) + + def open_dataset(self, fp, **kwargs): + with rasterio.MemoryFile() as mf: + mf.write(fp.read()) + return mf.open(driver=self.driver) + + def close_dataset(self): + """Closes the file for the datasource + """ + self.dataset.close() + + @common_doc(COMMON_DATA_DOC) + def get_coordinates(self): + """{get_coordinates} + + The default implementation tries to find the lat/lon coordinates based on dataset.affine. + It cannot determine the alt or time dimensions, so child classes may + have to overload this method. + """ + + # check to see if the coordinates are rotated used affine + affine = self.dataset.transform + + if self.crs is not None: + crs = self.crs + elif isinstance(self.dataset.crs, rasterio.crs.CRS) and "init" in self.dataset.crs: + crs = self.dataset.crs["init"].upper() + elif isinstance(self.dataset.crs, dict) and "init" in self.dataset.crs: + crs = self.dataset.crs["init"].upper() + else: + try: + crs = pyproj.CRS(self.dataset.crs).to_wkt() + except pyproj.exceptions.CRSError: + raise RuntimeError("Unexpected rasterio crs '%s'" % self.dataset.crs) + + return Coordinates.from_geotransform(affine.to_gdal(), self.dataset.shape, crs) + + @common_doc(COMMON_DATA_DOC) + def get_data(self, coordinates, coordinates_index): + """{get_data} + """ + data = self.create_output_array(coordinates) + slc = coordinates_index + + # read data within coordinates_index window + window = ((slc[0].start, slc[0].stop), (slc[1].start, slc[1].stop)) + + if self.outputs is not None: # read all the bands + raster_data = self.dataset.read(out_shape=(len(self.outputs),) + tuple(coordinates.shape), window=window) + raster_data = np.moveaxis(raster_data, 0, 2) + else: # read the requested band + raster_data = self.dataset.read(self.band, out_shape=tuple(coordinates.shape), window=window) + + # set raster data to output array + data.data.ravel()[:] = raster_data.ravel() + return data + + # ------------------------------------------------------------------------- + # additional methods and properties + # ------------------------------------------------------------------------- + + @property + def band_count(self): + """The number of bands""" + + return self.dataset.count + + @cached_property + def band_descriptions(self): + """ A description of each band contained in dataset.tags + + Returns + ------- + OrderedDict + Dictionary of band_number: band_description pairs. The band_description values are a dictionary, each + containing a number of keys -- depending on the metadata + """ + + return OrderedDict((i, self.dataset.tags(i + 1)) for i in range(self.band_count)) + + @cached_property + def band_keys(self): + """An alternative view of band_descriptions based on the keys present in the metadata + + Returns + ------- + dict + Dictionary of metadata keys, where the values are the value of the key for each band. + For example, band_keys['TIME'] = ['2015', '2016', '2017'] for a dataset with three bands. + """ + + keys = {k for i in range(self.band_count) for k in self.band_descriptions[i]} # set + return {k: [self.band_descriptions[i].get(k) for i in range(self.band_count)] for k in keys} + + def get_band_numbers(self, key, value): + """Return the bands that have a key equal to a specified value. + + Parameters + ---------- + key : str / list + Key present in the metadata of the band. Can be a single key, or a list of keys. + value : str / list + Value of the key that should be returned. Can be a single value, or a list of values + + Returns + ------- + np.ndarray + An array of band numbers that match the criteria + """ + if not hasattr(key, "__iter__") or isinstance(key, string_types): + key = [key] + + if not hasattr(value, "__iter__") or isinstance(value, string_types): + value = [value] + + match = np.ones(self.band_count, bool) + for k, v in zip(key, value): + match = match & (np.array(self.band_keys[k]) == v) + matches = np.where(match)[0] + 1 + + return matches diff --git a/podpac/core/data/reprojection.py b/podpac/core/data/reprojection.py index df1bd3252..f835c07e6 100644 --- a/podpac/core/data/reprojection.py +++ b/podpac/core/data/reprojection.py @@ -1,14 +1,18 @@ from __future__ import division, unicode_literals, print_function, absolute_import -from six import string_types +import logging +import copy +from six import string_types import traitlets as tl -from podpac.core.utils import common_doc, NodeTrait +from podpac.core.utils import common_doc, NodeTrait, cached_property from podpac.core.coordinates import Coordinates from podpac.core.node import Node +from podpac.core.interpolation.interpolation import InterpolationTrait from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource -from podpac.core.data.interpolation import interpolation_trait + +_logger = logging.getLogger(__name__) class ReprojectedSource(DataSource): @@ -21,16 +25,17 @@ class ReprojectedSource(DataSource): The source node source_interpolation : str Type of interpolation method to use for the source node - reprojected_coordinates : Coordinates + reprojected_coordinates : :class:`podpac.Coordinates` Coordinates where the source node should be evaluated. """ - source = NodeTrait().tag(readonly=True) - - # node attrs - source_interpolation = interpolation_trait().tag(attr=True) + source = NodeTrait().tag(attr=True) + source_interpolation = InterpolationTrait().tag(attr=True) reprojected_coordinates = tl.Instance(Coordinates).tag(attr=True) + # list of attribute names, used by __repr__ and __str__ to display minimal info about the node + _repr_keys = ["source", "interpolation"] + def _first_init(self, **kwargs): if "reprojected_coordinates" in kwargs: if isinstance(kwargs["reprojected_coordinates"], dict): @@ -40,34 +45,49 @@ def _first_init(self, **kwargs): return super(ReprojectedSource, self)._first_init(**kwargs) + @cached_property + def eval_source(self): + if self.source_interpolation is not None and not self.source.has_trait("interpolation"): + _logger.warning( + "ReprojectedSource cannot set the 'source_interpolation'" + " since 'source' does not have an 'interpolation' " + " trait. \n type(source): %s\nsource: %s" % (str(type(self.source)), str(self.source)) + ) + + source = self.source + if ( + self.source_interpolation is not None + and self.source.has_trait("interpolation") + and self.source_interpolation != self.source.interpolation + ): + source = copy.deepcopy(source) + source.set_trait("interpolation", self.source_interpolation) + + return source + @common_doc(COMMON_DATA_DOC) - def get_native_coordinates(self): - """{get_native_coordinates} + def get_coordinates(self): + """{get_coordinates} """ - if isinstance(self.source, DataSource): - sc = self.source.native_coordinates - else: # Otherwise we cannot guarantee that native_coordinates exist - sc = self.reprojected_coordinates + + # cannot guarantee that coordinates exist + if not isinstance(self.source, DataSource): + return self.reprojected_coordinates + + sc = self.source.coordinates rc = self.reprojected_coordinates - coords = [rc[dim] if dim in rc.dims else sc[dim] for dim in sc.dims] - return Coordinates(coords) + return Coordinates( + [rc[dim] if dim in rc.dims else self.source.coordinates[dim] for dim in self.source.coordinates.dims], + validate_crs=False, + ) @common_doc(COMMON_DATA_DOC) def get_data(self, coordinates, coordinates_index): """{get_data} """ - if hasattr(self.source, "interpolation") and self.source_interpolation is not None: - si = self.source.interpolation - self.source.interpolation = self.source_interpolation - elif self.source_interpolation is not None: - _logger.warning( - "ReprojectedSource cannot set the 'source_interpolation'" - " since self.source does not have an 'interpolation' " - " attribute. \n type(self.source): %s\nself.source: " % (str(type(self.source)), str(self.source)) - ) - data = self.source.eval(coordinates) - if hasattr(self.source, "interpolation") and self.source_interpolation is not None: - self.source.interpolation = si + + data = self.eval_source.eval(coordinates) + # The following is needed in case the source is an algorithm # or compositor node that doesn't have all the dimensions of # the reprojected coordinates @@ -79,11 +99,4 @@ def get_data(self, coordinates, coordinates_index): @property def base_ref(self): - """Summary - - Returns - ------- - TYPE - Description - """ return "{}_reprojected".format(self.source.base_ref) diff --git a/podpac/core/data/test/assets/points-no-data.csv b/podpac/core/data/test/assets/points-no-data.csv new file mode 100644 index 000000000..b15cf818a --- /dev/null +++ b/podpac/core/data/test/assets/points-no-data.csv @@ -0,0 +1,6 @@ +lat,lon,time,altitude +0,0,2018-01-01T12:00:00,0 +1,0,2018-01-01T12:00:00,0 +1,2,2018-01-01T12:00:00,0 +1,2,2018-01-01T12:00:03,0 +1,2,2018-01-01T12:00:03,4 \ No newline at end of file diff --git a/podpac/core/data/test/assets/points-one-dim.csv b/podpac/core/data/test/assets/points-one-dim.csv new file mode 100644 index 000000000..eb463c791 --- /dev/null +++ b/podpac/core/data/test/assets/points-one-dim.csv @@ -0,0 +1,6 @@ +time,data +2018-01-01T12:00:00,0 +2018-01-02T12:00:00,1 +2018-01-03T12:00:00,2 +2018-01-04T12:00:03,3 +2018-01-05T12:00:03,4 \ No newline at end of file diff --git a/podpac/core/data/test/test_array.py b/podpac/core/data/test/test_array.py index 87eeb6184..52a320578 100644 --- a/podpac/core/data/test/test_array.py +++ b/podpac/core/data/test/test_array.py @@ -1,4 +1,5 @@ import numpy as np +import traitlets as tl import pytest from podpac.core.coordinates import Coordinates, clinspace @@ -13,33 +14,30 @@ class TestArray(object): data = np.random.rand(11, 11) coordinates = Coordinates([clinspace(-25, 25, 11), clinspace(-25, 25, 11)], dims=["lat", "lon"]) - def test_source_trait(self): - """ must be an ndarray """ - - node = Array(source=self.data, native_coordinates=self.coordinates) + def test_data_array(self): + node = Array(source=self.data, coordinates=self.coordinates) + def test_data_list(self): # list is coercable to array - node = Array(source=[0, 1, 1], native_coordinates=self.coordinates) + node = Array(source=[0, 1, 1], coordinates=self.coordinates) - # this list is not coercable to array - # Starting with numpy 0.16, this is now allowed! - # with pytest.raises(TraitError): - # node = Array(source=[0, [0, 1]], native_coordinates=self.coordinates) + def test_invalid_data(self): + with pytest.raises(ValueError, match="Array 'source' data must be numerical"): + node = Array(source=["a", "b"], coordinates=self.coordinates) def test_get_data(self): """ defined get_data function""" - source = self.data - node = Array(source=source, native_coordinates=self.coordinates) + node = Array(source=self.data, coordinates=self.coordinates) output = node.eval(self.coordinates) assert isinstance(output, UnitsDataArray) - assert output.values[0, 0] == source[0, 0] - assert output.values[4, 5] == source[4, 5] + assert output.values[0, 0] == self.data[0, 0] + assert output.values[4, 5] == self.data[4, 5] def test_get_data_multiple(self): data = np.random.rand(11, 11, 2) - node = Array(source=data, native_coordinates=self.coordinates, outputs=["a", "b"]) + node = Array(source=data, coordinates=self.coordinates, outputs=["a", "b"]) output = node.eval(self.coordinates) assert isinstance(output, UnitsDataArray) assert output.dims == ("lat", "lon", "output") @@ -47,43 +45,20 @@ def test_get_data_multiple(self): np.testing.assert_array_equal(output.sel(output="a"), data[:, :, 0]) np.testing.assert_array_equal(output.sel(output="b"), data[:, :, 1]) - node = Array(source=data, native_coordinates=self.coordinates, outputs=["a", "b"], output="b") + node = Array(source=data, coordinates=self.coordinates, outputs=["a", "b"], output="b") output = node.eval(self.coordinates) assert isinstance(output, UnitsDataArray) assert output.dims == ("lat", "lon") np.testing.assert_array_equal(output, data[:, :, 1]) - def test_native_coordinates(self): - """test that native coordinates get defined""" + def test_coordinates(self): + node = Array(source=self.data, coordinates=self.coordinates) + assert node.coordinates node = Array(source=self.data) - with pytest.raises(NotImplementedError): - node.get_native_coordinates() - - node = Array(source=self.data, native_coordinates=self.coordinates) - assert node.native_coordinates - - node = Array(source=self.data, native_coordinates=self.coordinates) - native_coordinates = node.native_coordinates - get_native_coordinates = node.get_native_coordinates() - assert native_coordinates - assert get_native_coordinates - assert native_coordinates == get_native_coordinates - - def test_base_definition(self): - node = Array(source=self.data, native_coordinates=self.coordinates) - d = node.base_definition - source = np.array(d["source"]) - np.testing.assert_array_equal(source, self.data) - - def test_definition(self): - node = Array(source=self.data, native_coordinates=self.coordinates) - node2 = Node.from_definition(node.definition) - assert isinstance(node2, Array) - np.testing.assert_array_equal(node2.source, self.data) + with pytest.raises(tl.TraitError): + node.coordinates - def test_json(self): - node = Array(source=self.data, native_coordinates=self.coordinates) - node2 = Node.from_json(node.json) - assert isinstance(node2, Array) - np.testing.assert_array_equal(node2.source, self.data) + def test_no_cache(self): + node = Array() + assert len(node.cache_ctrl._cache_stores) == 0 diff --git a/podpac/core/data/test/test_base_dataset_source.py b/podpac/core/data/test/test_base_dataset_source.py deleted file mode 100644 index a81ae6ece..000000000 --- a/podpac/core/data/test/test_base_dataset_source.py +++ /dev/null @@ -1,209 +0,0 @@ -import numpy as np -import pytest - -from podpac.core.data.file import DatasetSource - -LAT = [0, 1, 2] -LON = [10, 20] -TIME = [100, 200] -ALT = [1, 2, 3, 4] -DATA = np.arange(48).reshape((3, 2, 2, 4)) -OTHER = 2 * np.arange(48).reshape((3, 2, 2, 4)) - - -class MockDatasetSourceSingle(DatasetSource): - source = "mock-single" - dataset = {"lat": LAT, "lon": LON, "time": TIME, "alt": ALT, "data": DATA} - dims = ["lat", "lon", "time", "alt"] - available_keys = ["data"] - - -class MockDatasetSourceMultiple(DatasetSource): - source = "mock-multiple" - dataset = {"lat": LAT, "lon": LON, "time": TIME, "alt": ALT, "data": DATA, "other": OTHER} - dims = ["lat", "lon", "time", "alt"] - available_keys = ["data", "other"] - - -class TestDatasetSource(object): - def test_not_implemented(self): - with pytest.raises(NotImplementedError): - node = DatasetSource() - - def test_init(self): - node = MockDatasetSourceSingle() - node = MockDatasetSourceMultiple() - - def test_close(self): - node = MockDatasetSourceSingle() - node.close_dataset() - - def test_data_key_and_output_keys(self): - # cannot both be defined - with pytest.raises(TypeError, match=".* cannot have both"): - node = MockDatasetSourceSingle(data_key="data", output_keys=["data"]) - - # make a standard single-output node for datasets with a single non-dimension key - node = MockDatasetSourceSingle() - assert node.data_key == "data" - assert node.output_keys is None - assert node.outputs is None - - # make a multi-output node for datasets with multiple non-dimension keys - node = MockDatasetSourceMultiple() - assert node.data_key is None - assert node.output_keys == ["data", "other"] - assert node.outputs == ["data", "other"] - - def test_outputs(self): - # standard single-output nodes have no "outputs" - node = MockDatasetSourceSingle(data_key="data") - assert node.outputs == None - - node = MockDatasetSourceMultiple(data_key="data") - assert node.outputs == None - - # for multi-output nodes, use the dataset's keys (output_keys) by default - node = MockDatasetSourceSingle(output_keys=["data"]) - assert node.outputs == ["data"] - - node = MockDatasetSourceMultiple(output_keys=["data", "other"]) - assert node.outputs == ["data", "other"] - - node = MockDatasetSourceMultiple(output_keys=["data"]) - assert node.outputs == ["data"] - - # alternate outputs names can be specified - node = MockDatasetSourceSingle(output_keys=["data"], outputs=["a"]) - assert node.outputs == ["a"] - - node = MockDatasetSourceMultiple(output_keys=["data", "other"], outputs=["a", "b"]) - assert node.outputs == ["a", "b"] - - node = MockDatasetSourceMultiple(output_keys=["data"], outputs=["a"]) - assert node.outputs == ["a"] - - node = MockDatasetSourceMultiple(outputs=["a", "b"]) - assert node.outputs == ["a", "b"] - - # but the outputs and output_keys must match - with pytest.raises(ValueError, match="outputs and output_keys size mismatch"): - node = MockDatasetSourceMultiple(output_keys=["data"], outputs=["a", "b"]) - - with pytest.raises(ValueError, match="outputs and output_keys size mismatch"): - node = MockDatasetSourceMultiple(output_keys=["data", "other"], outputs=["a"]) - - with pytest.raises(ValueError, match="outputs and output_keys size mismatch"): - node = MockDatasetSourceMultiple(outputs=["a"]) - - # and outputs cannot be provided for single-output nodes - with pytest.raises(TypeError, match="outputs must be None for single-output nodes"): - node = MockDatasetSourceSingle(data_key="data", outputs=["a"]) - - with pytest.raises(TypeError, match="outputs must be None for single-output nodes"): - node = MockDatasetSourceMultiple(data_key="data", outputs=["a"]) - - with pytest.raises(TypeError, match="outputs must be None for single-output nodes"): - node = MockDatasetSourceSingle(outputs=["a"]) - - def test_output(self): - with pytest.raises(TypeError, match="Invalid output"): - node = MockDatasetSourceSingle(data_key="data", output="data") - - with pytest.raises(ValueError, match="Invalid output"): - node = MockDatasetSourceMultiple(outputs=["a", "b"], output="other") - - def test_native_coordinates(self): - node = MockDatasetSourceSingle() - nc = node.native_coordinates - assert nc.dims == ("lat", "lon", "time", "alt") - np.testing.assert_array_equal(nc["lat"].coordinates, LAT) - np.testing.assert_array_equal(nc["lon"].coordinates, LON) - np.testing.assert_array_equal(nc["time"].coordinates, TIME) - np.testing.assert_array_equal(nc["alt"].coordinates, ALT) - - def test_base_definition(self): - node = MockDatasetSourceSingle() - d = node.base_definition - assert "attrs" in d - assert "lat_key" in d["attrs"] - assert "lon_key" in d["attrs"] - assert "alt_key" in d["attrs"] - assert "time_key" in d["attrs"] - assert "data_key" in d["attrs"] - assert "output_keys" not in d["attrs"] - assert "outputs" not in d["attrs"] - assert "cf_time" not in d["attrs"] - assert "cf_units" not in d["attrs"] - assert "cf_calendar" not in d["attrs"] - assert "crs" not in d["attrs"] - - node = MockDatasetSourceMultiple() - d = node.base_definition - assert "attrs" in d - assert "lat_key" in d["attrs"] - assert "lon_key" in d["attrs"] - assert "alt_key" in d["attrs"] - assert "time_key" in d["attrs"] - assert "output_keys" in d["attrs"] - assert "data_key" not in d["attrs"] - assert "outputs" not in d["attrs"] - assert "cf_time" not in d["attrs"] - assert "cf_units" not in d["attrs"] - assert "cf_calendar" not in d["attrs"] - assert "crs" not in d["attrs"] - - node = MockDatasetSourceMultiple(outputs=["a", "b"]) - d = node.base_definition - assert "attrs" in d - assert "lat_key" in d["attrs"] - assert "lon_key" in d["attrs"] - assert "alt_key" in d["attrs"] - assert "time_key" in d["attrs"] - assert "output_keys" in d["attrs"] - assert "outputs" in d["attrs"] - assert "data_key" not in d["attrs"] - assert "cf_time" not in d["attrs"] - assert "cf_units" not in d["attrs"] - assert "cf_calendar" not in d["attrs"] - assert "crs" not in d["attrs"] - - class MockDatasetSource1(DatasetSource): - source = "temp" - dims = ["lat", "lon"] - available_keys = ["data"] - - node = MockDatasetSource1(crs="EPSG::3294") - d = node.base_definition - assert "attrs" in d - assert "lat_key" in d["attrs"] - assert "lon_key" in d["attrs"] - assert "data_key" in d["attrs"] - assert "crs" in d["attrs"] - assert "time_key" not in d["attrs"] - assert "alt_key" not in d["attrs"] - assert "output_keys" not in d["attrs"] - assert "outputs" not in d["attrs"] - assert "cf_time" not in d["attrs"] - assert "cf_units" not in d["attrs"] - assert "cf_calendar" not in d["attrs"] - - class MockDatasetSource2(DatasetSource): - source = "temp" - dims = ["time", "alt"] - available_keys = ["data"] - - node = MockDatasetSource2(cf_time=True) - d = node.base_definition - assert "attrs" in d - assert "alt_key" in d["attrs"] - assert "time_key" in d["attrs"] - assert "data_key" in d["attrs"] - assert "cf_time" in d["attrs"] - assert "cf_units" in d["attrs"] - assert "cf_calendar" in d["attrs"] - assert "crs" not in d["attrs"] - assert "lat_key" not in d["attrs"] - assert "lon_key" not in d["attrs"] - assert "output_keys" not in d["attrs"] - assert "outputs" not in d["attrs"] diff --git a/podpac/core/data/test/test_csv.py b/podpac/core/data/test/test_csv.py index b8d6026bc..ba6b56bb3 100644 --- a/podpac/core/data/test/test_csv.py +++ b/podpac/core/data/test/test_csv.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from podpac.core.data.file import CSV +from podpac.core.data.csv_source import CSV class TestCSV(object): @@ -13,6 +13,8 @@ class TestCSV(object): source_single = os.path.join(os.path.dirname(__file__), "assets/points-single.csv") source_multiple = os.path.join(os.path.dirname(__file__), "assets/points-multiple.csv") source_no_header = os.path.join(os.path.dirname(__file__), "assets/points-no-header.csv") + source_one_dim = os.path.join(os.path.dirname(__file__), "assets/points-one-dim.csv") + source_no_data = os.path.join(os.path.dirname(__file__), "assets/points-no-data.csv") lat = [0, 1, 1, 1, 1] lon = [0, 0, 2, 2, 2] @@ -30,68 +32,146 @@ class TestCSV(object): data = [0, 1, 2, 3, 4] other = [10.5, 20.5, 30.5, 40.5, 50.5] - # def test_init(self): - # node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") + def test_init(self): + node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") + + def test_close(self): + node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") + + def test_get_dims(self): + node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.dims == ["lat", "lon", "time", "alt"] + + node = CSV(source=self.source_multiple, alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.dims == ["lat", "lon", "time", "alt"] - # def test_close(self): - # node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") - # node.close_dataset() + def test_available_data_keys(self): + node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.available_data_keys == ["data"] + + node = CSV(source=self.source_multiple, alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.available_data_keys == ["data", "other"] + + node = CSV(source=self.source_no_data, alt_key="altitude", crs="+proj=merc +vunits=m") + with pytest.raises(ValueError, match="No data keys found"): + node.available_data_keys + + def test_data_key(self): + # default + node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.data_key == "data" + + # specify + node = CSV(source=self.source_single, data_key="data", alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.data_key == "data" + + # invalid + with pytest.raises(ValueError, match="Invalid data_key"): + node = CSV(source=self.source_single, data_key="misc", alt_key="altitude", crs="+proj=merc +vunits=m") + + def test_data_key_col(self): + # specify column + node = CSV(source=self.source_single, data_key=4, alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.data_key == 4 + + # invalid (out of range) + with pytest.raises(ValueError, match="Invalid data_key"): + node = CSV(source=self.source_single, data_key=5, alt_key="altitude", crs="+proj=merc +vunits=m") + + # invalid (dimension key) + with pytest.raises(ValueError, match="Invalid data_key"): + node = CSV(source=self.source_single, data_key=0, alt_key="altitude", crs="+proj=merc +vunits=m") + + def test_data_key_multiple_outputs(self): + # default + node = CSV(source=self.source_multiple, alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.data_key == ["data", "other"] + + # specify multiple + node = CSV( + source=self.source_multiple, data_key=["other", "data"], alt_key="altitude", crs="+proj=merc +vunits=m" + ) + assert node.data_key == ["other", "data"] - # def test_get_dims(self): - # node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") - # assert node.dims == ["lat", "lon", "time", "alt"] + # specify one + node = CSV(source=self.source_multiple, data_key="other", alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.data_key == "other" - # node = CSV(source=self.source_multiple, alt_key="altitude", crs="+proj=merc +vunits=m") - # assert node.dims == ["lat", "lon", "time", "alt"] + # specify multiple: invalid item + with pytest.raises(ValueError, match="Invalid data_key"): + node = CSV( + source=self.source_multiple, data_key=["data", "misc"], alt_key="altitude", crs="+proj=merc +vunits=m" + ) - # def test_available_keys(self): - # node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") - # assert node.available_keys == ["data"] + # specify one: invalid + with pytest.raises(ValueError, match="Invalid data_key"): + node = CSV(source=self.source_multiple, data_key="misc", alt_key="altitude", crs="+proj=merc +vunits=m") - # node = CSV(source=self.source_multiple, alt_key="altitude", crs="+proj=merc +vunits=m") - # assert node.available_keys == ["data", "other"] + def test_data_key_col_multiple_outputs(self): + # specify multiple + node = CSV(source=self.source_multiple, data_key=[4, 5], alt_key="altitude", crs="+proj=merc +vunits=m") + assert node.data_key == [4, 5] + assert node.outputs == ["data", "other"] - # def test_native_coordinates(self): - # node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") - # nc = node.native_coordinates - # assert nc.dims == ("lat_lon_time_alt",) - # np.testing.assert_array_equal(nc["lat"].coordinates, self.lat) - # np.testing.assert_array_equal(nc["lon"].coordinates, self.lon) - # np.testing.assert_array_equal(nc["time"].coordinates, self.time) - # np.testing.assert_array_equal(nc["alt"].coordinates, self.alt) + # specify one + node = CSV(source=self.source_multiple, data_key=4, alt_key="altitude", crs="+proj=merc +vunits=m") + + assert node.data_key == 4 + assert node.outputs is None + + # specify multiple: invalid item + with pytest.raises(ValueError, match="Invalid data_key"): + node = CSV(source=self.source_multiple, data_key=[4, 6], alt_key="altitude", crs="+proj=merc +vunits=m") + + # specify one: invalid with pytest.raises(ValueError, match="Invalid data_key"): + node = CSV(source=self.source_multiple, data_key=6, alt_key="altitude", crs="+proj=merc +vunits=m") + + def test_coordinates(self): + node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") + nc = node.coordinates + assert nc.dims == ("lat_lon_time_alt",) + np.testing.assert_array_equal(nc["lat"].coordinates, self.lat) + np.testing.assert_array_equal(nc["lon"].coordinates, self.lon) + np.testing.assert_array_equal(nc["time"].coordinates, self.time) + np.testing.assert_array_equal(nc["alt"].coordinates, self.alt) + + # one dim (unstacked) + node = CSV(source=self.source_one_dim) + nc = node.coordinates + assert nc.dims == ("time",) def test_get_data(self): node = CSV(source=self.source_single, alt_key="altitude", data_key="data", crs="+proj=merc +vunits=m") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) np.testing.assert_array_equal(out, self.data) node = CSV(source=self.source_multiple, alt_key="altitude", data_key="data", crs="+proj=merc +vunits=m") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) np.testing.assert_array_equal(out, self.data) node = CSV(source=self.source_multiple, alt_key="altitude", data_key="other", crs="+proj=merc +vunits=m") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) np.testing.assert_array_equal(out, self.other) # default node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) np.testing.assert_array_equal(out, self.data) def test_get_data_multiple(self): # multiple data keys node = CSV( - source=self.source_multiple, alt_key="altitude", output_keys=["data", "other"], crs="+proj=merc +vunits=m" + source=self.source_multiple, alt_key="altitude", data_key=["data", "other"], crs="+proj=merc +vunits=m" ) - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) assert out.dims == ("lat_lon_time_alt", "output") np.testing.assert_array_equal(out["output"], ["data", "other"]) np.testing.assert_array_equal(out.sel(output="data"), self.data) np.testing.assert_array_equal(out.sel(output="other"), self.other) # single data key - node = CSV(source=self.source_multiple, alt_key="altitude", output_keys=["data"], crs="+proj=merc +vunits=m") - out = node.eval(node.native_coordinates) + node = CSV(source=self.source_multiple, alt_key="altitude", data_key=["data"], crs="+proj=merc +vunits=m") + out = node.eval(node.coordinates) assert out.dims == ("lat_lon_time_alt", "output") np.testing.assert_array_equal(out["output"], ["data"]) np.testing.assert_array_equal(out.sel(output="data"), self.data) @@ -100,11 +180,11 @@ def test_get_data_multiple(self): node = CSV( source=self.source_multiple, alt_key="altitude", - output_keys=["data", "other"], + data_key=["data", "other"], outputs=["a", "b"], crs="+proj=merc +vunits=m", ) - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) assert out.dims == ("lat_lon_time_alt", "output") np.testing.assert_array_equal(out["output"], ["a", "b"]) np.testing.assert_array_equal(out.sel(output="a"), self.data) @@ -112,7 +192,7 @@ def test_get_data_multiple(self): # default node = CSV(source=self.source_multiple, alt_key="altitude", crs="+proj=merc +vunits=m") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) assert out.dims == ("lat_lon_time_alt", "output") np.testing.assert_array_equal(out["output"], ["data", "other"]) np.testing.assert_array_equal(out.sel(output="data"), self.data) @@ -129,8 +209,8 @@ def test_cols(self): crs="+proj=merc +vunits=m", ) - # native_coordinates - nc = node.native_coordinates + # coordinates + nc = node.coordinates assert nc.dims == ("lat_lon_time_alt",) np.testing.assert_array_equal(nc["lat"].coordinates, self.lat) np.testing.assert_array_equal(nc["lon"].coordinates, self.lon) @@ -148,13 +228,13 @@ def test_cols_multiple(self): lon_key=1, time_key=2, alt_key=3, - output_keys=[4, 5], + data_key=[4, 5], outputs=["a", "b"], crs="+proj=merc +vunits=m", ) # native coordinantes - nc = node.native_coordinates + nc = node.coordinates assert nc.dims == ("lat_lon_time_alt",) np.testing.assert_array_equal(nc["lat"].coordinates, self.lat) np.testing.assert_array_equal(nc["lon"].coordinates, self.lon) @@ -181,7 +261,7 @@ def test_header(self): ) # native coordinantes - nc = node.native_coordinates + nc = node.coordinates assert nc.dims == ("lat_lon_time_alt",) np.testing.assert_array_equal(nc["lat"].coordinates, self.lat) np.testing.assert_array_equal(nc["lon"].coordinates, self.lon) @@ -191,23 +271,3 @@ def test_header(self): # eval out = node.eval(nc) np.testing.assert_array_equal(out, self.data) - - def test_base_definition(self): - node = CSV(source=self.source_single, alt_key="altitude", crs="+proj=merc +vunits=m") - d = node.base_definition - if "attrs" in d: - assert "header" not in d["attrs"] - - node = CSV( - source=self.source_no_header, - lat_key=0, - lon_key=1, - time_key=2, - alt_key=3, - data_key=4, - header=None, - crs="+proj=merc +vunits=m", - ) - d = node.base_definition - assert "attrs" in d - assert "header" in d["attrs"] diff --git a/podpac/core/data/test/test_dataset.py b/podpac/core/data/test/test_dataset.py index 53cf82847..2b5f985ce 100644 --- a/podpac/core/data/test/test_dataset.py +++ b/podpac/core/data/test/test_dataset.py @@ -3,7 +3,7 @@ import pytest -from podpac.core.data.file import Dataset +from podpac.core.data.dataset_source import Dataset class TestDataset(object): @@ -26,17 +26,18 @@ def test_dims(self): assert node.dims == ["time", "lat", "lon"] # un-mapped keys - with pytest.raises(ValueError, match="Unexpected dimension"): - node = Dataset(source=self.source) + # node = Dataset(source=self.source) + # with pytest.raises(ValueError, match="Unexpected dimension"): + # node.dims - def test_available_keys(self): + def test_available_data_keys(self): node = Dataset(source=self.source, time_key="day") - assert node.available_keys == ["data", "other"] + assert node.available_data_keys == ["data", "other"] - def test_native_coordinates(self): + def test_coordinates(self): # specify dimension keys node = Dataset(source=self.source, time_key="day") - nc = node.native_coordinates + nc = node.coordinates assert nc.dims == ("time", "lat", "lon") np.testing.assert_array_equal(nc["lat"].coordinates, self.lat) np.testing.assert_array_equal(nc["lon"].coordinates, self.lon) @@ -46,18 +47,18 @@ def test_native_coordinates(self): def test_get_data(self): # specify data key node = Dataset(source=self.source, time_key="day", data_key="data") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) np.testing.assert_array_equal(out, self.data) node.close_dataset() node = Dataset(source=self.source, time_key="day", data_key="other") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) np.testing.assert_array_equal(out, self.other) node.close_dataset() - def test_get_data_multilpe(self): - node = Dataset(source=self.source, time_key="day", output_keys=["data", "other"]) - out = node.eval(node.native_coordinates) + def test_get_data_multiple(self): + node = Dataset(source=self.source, time_key="day", data_key=["data", "other"]) + out = node.eval(node.coordinates) assert out.dims == ("time", "lat", "lon", "output") np.testing.assert_array_equal(out["output"], ["data", "other"]) np.testing.assert_array_equal(out.sel(output="data"), self.data) @@ -65,16 +66,16 @@ def test_get_data_multilpe(self): node.close_dataset() # single - node = Dataset(source=self.source, time_key="day", output_keys=["other"]) - out = node.eval(node.native_coordinates) + node = Dataset(source=self.source, time_key="day", data_key=["other"]) + out = node.eval(node.coordinates) assert out.dims == ("time", "lat", "lon", "output") np.testing.assert_array_equal(out["output"], ["other"]) np.testing.assert_array_equal(out.sel(output="other"), self.other) node.close_dataset() # alternate output names - node = Dataset(source=self.source, time_key="day", output_keys=["data", "other"], outputs=["a", "b"]) - out = node.eval(node.native_coordinates) + node = Dataset(source=self.source, time_key="day", data_key=["data", "other"], outputs=["a", "b"]) + out = node.eval(node.coordinates) assert out.dims == ("time", "lat", "lon", "output") np.testing.assert_array_equal(out["output"], ["a", "b"]) np.testing.assert_array_equal(out.sel(output="a"), self.data) @@ -83,7 +84,7 @@ def test_get_data_multilpe(self): # default node = Dataset(source=self.source, time_key="day") - out = node.eval(node.native_coordinates) + out = node.eval(node.coordinates) assert out.dims == ("time", "lat", "lon", "output") np.testing.assert_array_equal(out["output"], ["data", "other"]) np.testing.assert_array_equal(out.sel(output="data"), self.data) @@ -91,20 +92,8 @@ def test_get_data_multilpe(self): node.close_dataset() def test_extra_dim(self): - # TODO - pass - - def test_base_definition(self): - node = Dataset(source=self.source, time_key="day") - d = node.base_definition - if "attrs" in d: - assert "extra_dim" not in d["attrs"] - node.close_dataset() + # default + node = Dataset(source=self.source) + assert node.extra_dim is None - node = Dataset( - source=self.source, time_key="day", extra_dim={"channel": 1} - ) # TODO actually use source with extra_dim - d = node.base_definition - assert "attrs" in d - assert "extra_dim" in d["attrs"] - node.close_dataset() + # TODO diff --git a/podpac/core/data/test/test_datasource.py b/podpac/core/data/test/test_datasource.py index 50d5205c1..fc68daeda 100644 --- a/podpac/core/data/test/test_datasource.py +++ b/podpac/core/data/test/test_datasource.py @@ -15,46 +15,33 @@ from podpac.core.node import COMMON_NODE_DOC, NodeException from podpac.core.style import Style from podpac.core.coordinates import Coordinates, clinspace, crange +from podpac.core.interpolation.interpolation import Interpolation, Interpolator +from podpac.core.interpolation.interpolator import Interpolator from podpac.core.data.datasource import DataSource, COMMON_DATA_DOC, DATA_DOC -from podpac.core.data.interpolation import Interpolation -from podpac.core.data.interpolator import Interpolator - - -class MockArrayDataSource(DataSource): - def get_data(self, coordinates, coordinates_index): - return self.create_output_array(coordinates, data=self.source[coordinates_index]) class MockDataSource(DataSource): - data = np.ones((101, 101)) + data = np.ones((11, 11)) data[0, 0] = 10 data[0, 1] = 1 data[1, 0] = 5 data[1, 1] = None - def get_native_coordinates(self): - return Coordinates([clinspace(-25, 25, 101), clinspace(-25, 25, 101)], dims=["lat", "lon"]) + def get_coordinates(self): + return Coordinates([clinspace(-25, 25, 11), clinspace(-25, 25, 11)], dims=["lat", "lon"]) def get_data(self, coordinates, coordinates_index): return self.create_output_array(coordinates, data=self.data[coordinates_index]) -class MockNonuniformDataSource(DataSource): - """ Mock Data Source for testing that is non-uniform """ +class MockDataSourceStacked(DataSource): + data = np.arange(11) - # mock 3 x 3 grid of random values - source = np.random.rand(3, 3) - native_coordinates = Coordinates([[-10, -2, -1], [4, 32, 1]], dims=["lat", "lon"]) - - def get_native_coordinates(self): - """ """ - return self.native_coordinates + def get_coordinates(self): + return Coordinates([clinspace((-25, -25), (25, 25), 11)], dims=["lat_lon"]) def get_data(self, coordinates, coordinates_index): - """ """ - s = coordinates_index - d = self.create_output_array(coordinates, data=self.source[s]) - return d + return self.create_output_array(coordinates, data=self.data[coordinates_index]) class TestDataDocs(object): @@ -78,116 +65,168 @@ class TestDataSource(object): def test_init(self): node = DataSource() - def test_nomethods_must_be_implemented(self): + def test_get_data_not_implemented(self): node = DataSource() with pytest.raises(NotImplementedError): - node.get_native_coordinates() + node.get_data(None, None) + def test_get_coordinates_not_implemented(self): + node = DataSource() with pytest.raises(NotImplementedError): - node.get_data(None, None) + node.get_coordinates() - def test_set_native_coordinates(self): - nc = Coordinates([clinspace(0, 50, 101), clinspace(0, 50, 101)], dims=["lat", "lon"]) - node = DataSource(source="test", native_coordinates=nc) - assert node.native_coordinates is not None + def test_coordinates(self): + # not implemented + node = DataSource() + with pytest.raises(NotImplementedError): + node.coordinates - with pytest.raises(tl.TraitError): - DataSource(source="test", native_coordinates="not a coordinate") + # use get_coordinates (once) + class MyDataSource(DataSource): + get_coordinates_called = 0 - with pytest.raises(NotImplementedError): - DataSource(source="test").native_coordinates + def get_coordinates(self): + self.get_coordinates_called += 1 + return Coordinates([]) + + node = MyDataSource() + assert node.get_coordinates_called == 0 + assert isinstance(node.coordinates, Coordinates) + assert node.get_coordinates_called == 1 + assert isinstance(node.coordinates, Coordinates) + assert node.get_coordinates_called == 1 + + # can't set + with pytest.raises(AttributeError, match="can't set attribute"): + node.coordinates = Coordinates([]) + + def test_cache_coordinates(self): + class MyDataSource(DataSource): + get_coordinates_called = 0 + + def get_coordinates(self): + self.get_coordinates_called += 1 + return Coordinates([]) + + a = MyDataSource(cache_coordinates=True, cache_ctrl=["ram"]) + b = MyDataSource(cache_coordinates=True, cache_ctrl=["ram"]) + c = MyDataSource(cache_coordinates=False, cache_ctrl=["ram"]) + d = MyDataSource(cache_coordinates=True, cache_ctrl=[]) + + a.rem_cache("*") + b.rem_cache("*") + c.rem_cache("*") + d.rem_cache("*") + + # get_coordinates called once + assert not a.has_cache("coordinates") + assert a.get_coordinates_called == 0 + assert isinstance(a.coordinates, Coordinates) + assert a.get_coordinates_called == 1 + assert isinstance(a.coordinates, Coordinates) + assert a.get_coordinates_called == 1 + + # coordinates is cached to a, b, and c + assert a.has_cache("coordinates") + assert b.has_cache("coordinates") + assert c.has_cache("coordinates") + assert not d.has_cache("coordinates") + + # b: use cache, get_coordinates not called + assert b.get_coordinates_called == 0 + assert isinstance(b.coordinates, Coordinates) + assert b.get_coordinates_called == 0 + + # c: don't use cache, get_coordinates called + assert c.get_coordinates_called == 0 + assert isinstance(c.coordinates, Coordinates) + assert c.get_coordinates_called == 1 + + # d: use cache but there is no ram cache for this node, get_coordinates is called + assert d.get_coordinates_called == 0 + assert isinstance(d.coordinates, Coordinates) + assert d.get_coordinates_called == 1 + + def test_set_coordinates(self): + node = MockDataSource() + node.set_coordinates(Coordinates([])) + assert node.coordinates == Coordinates([]) + assert node.coordinates != node.get_coordinates() - def test_get_native_coordinates(self): - # get_native_coordinates should set the native_coordinates by default + # don't overwrite node = MockDataSource() - assert node.native_coordinates is not None - np.testing.assert_equal(node.native_coordinates["lat"].coordinates, np.linspace(-25, 25, 101)) - np.testing.assert_equal(node.native_coordinates["lon"].coordinates, np.linspace(-25, 25, 101)) + node.coordinates + node.set_coordinates(Coordinates([])) + assert node.coordinates != Coordinates([]) + assert node.coordinates == node.get_coordinates() + + def test_boundary(self): + # default + node = DataSource() + assert node.boundary == {} + + # none + node = DataSource(boundary={}) + + # centered + node = DataSource(boundary={"lat": 0.25, "lon": 2.0}) + node = DataSource(boundary={"time": "1,D"}) + + # box (not necessary centered) + with pytest.raises(NotImplementedError, match="Non-centered boundary not yet supported"): + node = DataSource(boundary={"lat": [-0.2, 0.3], "lon": [-2.0, 2.0]}) + + with pytest.raises(NotImplementedError, match="Non-centered boundary not yet supported"): + node = DataSource(boundary={"time": ["-1,D", "2,D"]}) + + # polygon + with pytest.raises(NotImplementedError, match="Non-centered boundary not yet supported"): + node = DataSource(boundary={"lat": [0.0, -0.5, 0.0, 0.5], "lon": [-0.5, 0.0, 0.5, 0.0]}) # diamond + + # array of boundaries (one for each coordinate) + with pytest.raises(NotImplementedError, match="Non-uniform boundary not yet supported"): + node = DataSource(boundary={"lat": [[-0.1, 0.4], [-0.2, 0.3], [-0.3, 0.2]], "lon": 0.5}) - # but don't call get_native_coordinates if the native_coordinates are set explicitly - nc = Coordinates([clinspace(0, 50, 101), clinspace(0, 50, 101)], dims=["lat", "lon"]) - node = MockDataSource(native_coordinates=nc) - assert node.native_coordinates is not None - np.testing.assert_equal(node.native_coordinates["lat"].coordinates, nc["lat"].coordinates) - np.testing.assert_equal(node.native_coordinates["lat"].coordinates, nc["lat"].coordinates) + with pytest.raises(NotImplementedError, match="Non-uniform boundary not yet supported"): + node = DataSource(boundary={"time": [["-1,D", "1,D"], ["-2,D", "1,D"]]}) + + # invalid + with pytest.raises(tl.TraitError): + node = DataSource(boundary=0.5) + + with pytest.raises(ValueError, match="Invalid dimension"): + node = DataSource(boundary={"other": 0.5}) + + with pytest.raises(TypeError, match="Invalid coordinate delta"): + node = DataSource(boundary={"lat": {}}) + + with pytest.raises(ValueError, match="Invalid boundary"): + node = DataSource(boundary={"lat": -0.25, "lon": 2.0}) # negative + + with pytest.raises(ValueError, match="Invalid boundary"): + node = DataSource(boundary={"time": "-2,D"}) # negative + + with pytest.raises(ValueError, match="Invalid boundary"): + node = DataSource(boundary={"time": "2018-01-01"}) # not a delta def test_invalid_interpolation(self): with pytest.raises(tl.TraitError): - MockDataSource(interpolation="myowninterp") + DataSource(interpolation="myowninterp") def test_invalid_nan_vals(self): with pytest.raises(tl.TraitError): - MockDataSource(nan_vals={}) + DataSource(nan_vals={}) with pytest.raises(tl.TraitError): - MockDataSource(nan_vals=10) - - def test_base_definition(self): - """Test definition property method""" - - # TODO: add interpolation definition testing - - node = DataSource(source="test") - d = node.base_definition - assert d - assert "node" in d - assert "source" in d - assert "lookup_source" not in d - assert "interpolation" in d - assert d["source"] == node.source - if "attrs" in d: - assert "nan_vals" not in d["attrs"] - - # keep nan_vals - node = DataSource(source="test", nan_vals=[-999]) - d = node.base_definition - assert "attrs" in d - assert "nan_vals" in d["attrs"] - - # array source - node2 = DataSource(source=np.array([1, 2, 3])) - d = node2.base_definition - assert "source" in d - assert isinstance(d["source"], list) - assert d["source"] == [1, 2, 3] - - # lookup source - node3 = DataSource(source=node) - d = node3.base_definition - assert "source" not in d - assert "lookup_source" in d - - # cannot tag source or interpolation as attr - class MyDataSource1(DataSource): - source = tl.Unicode().tag(attr=True) - - node = MyDataSource1(source="test") - with pytest.raises(NodeException, match="The 'source' property cannot be tagged as an 'attr'"): - node.base_definition - - class MyDataSource2(DataSource): - interpolation = tl.Unicode().tag(attr=True) - - node = MyDataSource2(source="test") - with pytest.raises(NodeException, match="The 'interpolation' property cannot be tagged as an 'attr'"): - node.base_definition + DataSource(nan_vals=10) def test_repr(self): - node = DataSource(source="test", native_coordinates=Coordinates([0, 1], dims=["lat", "lon"])) - repr(node) - - node = DataSource(source="test", native_coordinates=Coordinates([[0, 1]], dims=["lat_lon"])) - repr(node) - - class MyDataSource(DataSource): - pass - - node = MyDataSource(source="test") + node = DataSource() repr(node) def test_interpolation_class(self): - node = DataSource(source="test", interpolation="max") + node = DataSource(interpolation="max") assert node.interpolation_class assert isinstance(node.interpolation_class, Interpolation) assert node.interpolation_class.definition == "max" @@ -196,7 +235,7 @@ def test_interpolation_class(self): def test_interpolators(self): node = MockDataSource() - node.eval(node.native_coordinates) + node.eval(node.coordinates) assert isinstance(node.interpolators, OrderedDict) @@ -209,17 +248,17 @@ def test_interpolators(self): assert "lon" in list(node.interpolators.keys())[0] assert isinstance(list(node.interpolators.values())[0], Interpolator) - def test_evaluate_at_native_coordinates(self): - """evaluate node at native coordinates""" + def test_evaluate_at_coordinates(self): + """evaluate node at coordinates""" node = MockDataSource() - output = node.eval(node.native_coordinates) + output = node.eval(node.coordinates) assert isinstance(output, UnitsDataArray) - assert output.shape == (101, 101) + assert output.shape == (11, 11) assert output[0, 0] == 10 - assert output.lat.shape == (101,) - assert output.lon.shape == (101,) + assert output.lat.shape == (11,) + assert output.lon.shape == (11,) # assert coordinates assert isinstance(output.coords, DataArrayCoordinates) @@ -261,8 +300,7 @@ def test_evaluate_with_output_different_crs(self): # this will not throw an error because the requested coordinates will be transformed before request output = node.create_output_array(c) - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - node.eval(c_x, output=output) + node.eval(c_x, output=output) # this will throw an error because output is not in the same crs as node output = node.create_output_array(c_x) @@ -300,13 +338,11 @@ def test_evaluate_with_output_transpose(self): def test_evaluate_with_crs_transform(self): # grid coords - grid_coords = Coordinates([np.linspace(-10, 10, 21), np.linspace(-10, -10, 21)], dims=["lat", "lon"]) - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - grid_coords = grid_coords.transform("EPSG:2193") + grid_coords = Coordinates([np.linspace(-10, 10, 21), np.linspace(-10, 10, 21)], dims=["lat", "lon"]) + grid_coords = grid_coords.transform("EPSG:2193") node = MockDataSource() - with pytest.warns(UserWarning, match="transformation of coordinate segment lengths not yet implemented"): - out = node.eval(grid_coords) + out = node.eval(grid_coords) assert round(out.coords["lat"].values[0, 0]) == -8889021.0 assert round(out.coords["lon"].values[0, 0]) == 1928929.0 @@ -325,39 +361,37 @@ def test_evaluate_with_crs_transform(self): assert round(out.coords["lon"].values[0]) == 1928929.0 def test_evaluate_extra_dims(self): - # drop extra dimension - node = MockArrayDataSource( - source=np.empty((3, 2)), - native_coordinates=Coordinates([[0, 1, 2], [10, 11]], dims=["lat", "lon"]), - interpolation="nearest_preview", - ) + # drop extra unstacked dimension + class MyDataSource(DataSource): + coordinates = Coordinates([1, 11], dims=["lat", "lon"]) - output = node.eval(Coordinates([1, 11, "2018-01-01"], dims=["lat", "lon", "time"])) + def get_data(self, coordinates, coordinates_index): + return self.create_output_array(coordinates) + + node = MyDataSource() + coords = Coordinates([1, 11, "2018-01-01"], dims=["lat", "lon", "time"]) + output = node.eval(coords) assert output.dims == ("lat", "lon") # time dropped # drop extra stacked dimension if none of its dimensions are needed - node = MockArrayDataSource( - source=np.empty((2)), - native_coordinates=Coordinates([["2018-01-01", "2018-01-02"]], dims=["time"]), - interpolation="nearest_preview", - ) + class MyDataSource(DataSource): + coordinates = Coordinates(["2018-01-01"], dims=["time"]) - output = node.eval(Coordinates([[1, 11], "2018-01-01"], dims=["lat_lon", "time"])) + def get_data(self, coordinates, coordinates_index): + return self.create_output_array(coordinates) + + node = MyDataSource() + coords = Coordinates([[1, 11], "2018-01-01"], dims=["lat_lon", "time"]) + output = node.eval(coords) assert output.dims == ("time",) # lat_lon dropped - # don't drop extra stacked dimension if any of its dimensions are needed - # TODO interpolation is not yet implemented - # node = MockArrayDataSource( - # source=np.empty(3), - # native_coordinates=Coordinates([[0, 1, 2]], dims=['lat'])) - # output = node.eval(Coordinates([[1, 11]], dims=['lat_lon'])) - # assert output.dims == ('lat_lon') # lon portion not dropped + # but don't drop extra stacked dimension if any of its dimensions are needed + # output = node.eval(Coordinates([[1, 11, '2018-01-01']], dims=['lat_lon_time'])) + # assert output.dims == ('lat_lon_time') # lat and lon not dropped def test_evaluate_missing_dims(self): # missing unstacked dimension - node = MockArrayDataSource( - source=np.empty((3, 2)), native_coordinates=Coordinates([[0, 1, 2], [10, 11]], dims=["lat", "lon"]) - ) + node = MockDataSource() with pytest.raises(ValueError, match="Cannot evaluate these coordinates.*"): node.eval(Coordinates([1], dims=["lat"])) @@ -367,9 +401,7 @@ def test_evaluate_missing_dims(self): node.eval(Coordinates(["2018-01-01"], dims=["time"])) # missing any part of stacked dimension - node = MockArrayDataSource( - source=np.empty(3), native_coordinates=Coordinates([[[0, 1, 2], [10, 11, 12]]], dims=["lat_lon"]) - ) + node = MockDataSourceStacked() with pytest.raises(ValueError, match="Cannot evaluate these coordinates.*"): node.eval(Coordinates([1], dims=["time"])) @@ -387,27 +419,25 @@ def test_evaluate_no_overlap(self): assert np.all(np.isnan(output)) def test_evaluate_extract_output(self): - coords = Coordinates([[0, 1, 2, 3], [10, 11]], dims=["lat", "lon"]) - - class MockMultipleDataSource(DataSource): + class MyMultipleDataSource(DataSource): outputs = ["a", "b", "c"] - native_coordinates = coords + coordinates = Coordinates([[0, 1, 2, 3], [10, 11]], dims=["lat", "lon"]) def get_data(self, coordinates, coordinates_index): return self.create_output_array(coordinates, data=1) # don't extract when no output field is requested - node = MockMultipleDataSource() - o = node.eval(coords) + node = MyMultipleDataSource() + o = node.eval(node.coordinates) assert o.shape == (4, 2, 3) np.testing.assert_array_equal(o.dims, ["lat", "lon", "output"]) np.testing.assert_array_equal(o["output"], ["a", "b", "c"]) np.testing.assert_array_equal(o, 1) # do extract when an output field is requested - node = MockMultipleDataSource(output="b") + node = MyMultipleDataSource(output="b") - o = node.eval(coords) # get_data case + o = node.eval(node.coordinates) # get_data case assert o.shape == (4, 2) np.testing.assert_array_equal(o.dims, ["lat", "lon"]) np.testing.assert_array_equal(o, 1) @@ -418,16 +448,13 @@ def get_data(self, coordinates, coordinates_index): np.testing.assert_array_equal(o, np.nan) # should still work if the node has already extracted it - class MockMultipleDataSource2(DataSource): - outputs = ["a", "b", "c"] - native_coordinates = coords - + class MyMultipleDataSource2(MyMultipleDataSource): def get_data(self, coordinates, coordinates_index): out = self.create_output_array(coordinates, data=1) return out.sel(output=self.output) - node = MockMultipleDataSource2(output="b") - o = node.eval(coords) + node = MyMultipleDataSource2(output="b") + o = node.eval(node.coordinates) assert o.shape == (4, 2) np.testing.assert_array_equal(o.dims, ["lat", "lon"]) np.testing.assert_array_equal(o, 1) @@ -435,10 +462,26 @@ def get_data(self, coordinates, coordinates_index): def test_nan_vals(self): """ evaluate note with nan_vals """ - node = MockDataSource(nan_vals=[10, None]) - output = node.eval(node.native_coordinates) - - assert output.values[np.isnan(output)].shape == (2,) + # none + node = MockDataSource() + output = node.eval(node.coordinates) + assert np.sum(np.isnan(output)) == 1 + assert np.isnan(output[1, 1]) + + # one value + node = MockDataSource(nan_vals=[10]) + output = node.eval(node.coordinates) + assert np.sum(np.isnan(output)) == 2 + assert np.isnan(output[0, 0]) + assert np.isnan(output[1, 1]) + + # multiple values + node = MockDataSource(nan_vals=[10, 5]) + output = node.eval(node.coordinates) + assert np.sum(np.isnan(output)) == 3 + assert np.isnan(output[0, 0]) + assert np.isnan(output[1, 1]) + assert np.isnan(output[1, 0]) def test_get_data_np_array(self): class MockDataSourceReturnsArray(MockDataSource): @@ -446,10 +489,10 @@ def get_data(self, coordinates, coordinates_index): return self.data[coordinates_index] node = MockDataSourceReturnsArray() - output = node.eval(node.native_coordinates) + output = node.eval(node.coordinates) assert isinstance(output, UnitsDataArray) - assert node.native_coordinates["lat"].coordinates[4] == output.coords["lat"].values[4] + assert node.coordinates["lat"].coordinates[4] == output.coords["lat"].values[4] def test_get_data_DataArray(self): class MockDataSourceReturnsDataArray(MockDataSource): @@ -457,17 +500,81 @@ def get_data(self, coordinates, coordinates_index): return xr.DataArray(self.data[coordinates_index]) node = MockDataSourceReturnsDataArray() - output = node.eval(node.native_coordinates) + output = node.eval(node.coordinates) assert isinstance(output, UnitsDataArray) - assert node.native_coordinates["lat"].coordinates[4] == output.coords["lat"].values[4] + assert node.coordinates["lat"].coordinates[4] == output.coords["lat"].values[4] def test_find_coordinates(self): node = MockDataSource() l = node.find_coordinates() assert isinstance(l, list) assert len(l) == 1 - assert l[0] == node.native_coordinates + assert l[0] == node.coordinates + + def test_get_boundary(self): + # disable boundary validation (until non-centered and non-uniform boundaries are fully implemented) + class MockDataSourceNoBoundaryValidation(MockDataSource): + @tl.validate("boundary") + def _validate_boundary(self, d): + return d["value"] + + index = (slice(3, 9, 2), [3, 4, 6]) + + # points + node = MockDataSourceNoBoundaryValidation(boundary={}) + boundary = node._get_boundary(index) + assert boundary == {} + + # uniform centered + node = MockDataSourceNoBoundaryValidation(boundary={"lat": 0.1, "lon": 0.2}) + boundary = node._get_boundary(index) + assert boundary == {"lat": 0.1, "lon": 0.2} + + # uniform polygon + node = MockDataSourceNoBoundaryValidation(boundary={"lat": [-0.1, 0.1], "lon": [-0.1, 0.0, 0.1]}) + boundary = node._get_boundary(index) + assert boundary == {"lat": [-0.1, 0.1], "lon": [-0.1, 0.0, 0.1]} + + # non-uniform + lat_boundary = np.vstack([-np.arange(11), np.arange(11)]).T + lon_boundary = np.vstack([-2 * np.arange(11), 2 * np.arange(11)]).T + node = MockDataSourceNoBoundaryValidation(boundary={"lat": lat_boundary, "lon": lon_boundary}) + boundary = node._get_boundary(index) + np.testing.assert_array_equal(boundary["lat"], lat_boundary[index[0]]) + np.testing.assert_array_equal(boundary["lon"], lon_boundary[index[1]]) + + def test_get_boundary_stacked(self): + # disable boundary validation (until non-centered and non-uniform boundaries are fully implemented) + class MockDataSourceStackedNoBoundaryValidation(MockDataSourceStacked): + @tl.validate("boundary") + def _validate_boundary(self, d): + return d["value"] + + index = (slice(3, 9, 2),) + + # points + node = MockDataSourceStackedNoBoundaryValidation(boundary={}) + boundary = node._get_boundary(index) + assert boundary == {} + + # uniform centered + node = MockDataSourceStackedNoBoundaryValidation(boundary={"lat": 0.1, "lon": 0.1}) + boundary = node._get_boundary(index) + assert boundary == {"lat": 0.1, "lon": 0.1} + + # uniform polygon + node = MockDataSourceStackedNoBoundaryValidation(boundary={"lat": [-0.1, 0.1], "lon": [-0.1, 0.0, 0.1]}) + boundary = node._get_boundary(index) + assert boundary == {"lat": [-0.1, 0.1], "lon": [-0.1, 0.0, 0.1]} + + # non-uniform + lat_boundary = np.vstack([-np.arange(11), np.arange(11)]).T + lon_boundary = np.vstack([-2 * np.arange(11), 2 * np.arange(11)]).T + node = MockDataSourceStackedNoBoundaryValidation(boundary={"lat": lat_boundary, "lon": lon_boundary}) + boundary = node._get_boundary(index) + np.testing.assert_array_equal(boundary["lat"], lat_boundary[index]) + np.testing.assert_array_equal(boundary["lon"], lon_boundary[index]) class TestInterpolateData(object): @@ -481,15 +588,18 @@ def test_one_data_point(self): def test_interpolate_time(self): """ for now time uses nearest neighbor """ - source = np.random.rand(5) - coords_src = Coordinates([clinspace(0, 10, 5)], dims=["time"]) - coords_dst = Coordinates([clinspace(1, 11, 5)], dims=["time"]) + class MyDataSource(DataSource): + coordinates = Coordinates([clinspace(0, 10, 5)], dims=["time"]) + + def get_data(self, coordinates, coordinates_index): + return self.create_output_array(coordinates) - node = MockArrayDataSource(source=source, native_coordinates=coords_src) - output = node.eval(coords_dst) + node = MyDataSource() + coords = Coordinates([clinspace(1, 11, 5)], dims=["time"]) + output = node.eval(coords) assert isinstance(output, UnitsDataArray) - assert np.all(output.time.values == coords_dst.coords["time"]) + assert np.all(output.time.values == coords.coords["time"]) def test_interpolate_lat_time(self): """interpolate with n dims and time""" @@ -498,12 +608,16 @@ def test_interpolate_lat_time(self): def test_interpolate_alt(self): """ for now alt uses nearest neighbor """ - source = np.random.rand(5) - coords_src = Coordinates([clinspace(0, 10, 5)], dims=["alt"], crs="+proj=merc +vunits=m") - coords_dst = Coordinates([clinspace(1, 11, 5)], dims=["alt"], crs="+proj=merc +vunits=m") + class MyDataSource(DataSource): + coordinates = Coordinates([clinspace(0, 10, 5)], dims=["alt"], crs="+proj=merc +vunits=m") + + def get_data(self, coordinates, coordinates_index): + return self.create_output_array(coordinates) + + coords = Coordinates([clinspace(1, 11, 5)], dims=["alt"], crs="+proj=merc +vunits=m") - node = MockArrayDataSource(source=source, native_coordinates=coords_src) - output = node.eval(coords_dst) + node = MyDataSource() + output = node.eval(coords) assert isinstance(output, UnitsDataArray) - assert np.all(output.alt.values == coords_dst.coords["alt"]) + assert np.all(output.alt.values == coords.coords["alt"]) diff --git a/podpac/core/data/test/test_file_source.py b/podpac/core/data/test/test_file_source.py new file mode 100644 index 000000000..e8ea3acab --- /dev/null +++ b/podpac/core/data/test/test_file_source.py @@ -0,0 +1,260 @@ +import os + +import numpy as np +import traitlets as tl +import pytest + +import podpac +from podpac.core.data.file_source import BaseFileSource +from podpac.core.data.file_source import LoadFileMixin +from podpac.core.data.file_source import FileKeysMixin + +LAT = [0, 1, 2] +LON = [10, 20] +TIME = [100, 200] +ALT = [1, 2, 3, 4] +DATA = np.arange(48).reshape((3, 2, 2, 4)) +OTHER = 2 * np.arange(48).reshape((3, 2, 2, 4)) + + +class TestBaseFileSource(object): + def test_source_required(self): + node = BaseFileSource() + with pytest.raises(ValueError, match="'source' required"): + node.source + + def test_dataset_not_implemented(self): + node = BaseFileSource(source="mysource") + with pytest.raises(NotImplementedError): + node.dataset + + def test_close(self): + node = BaseFileSource(source="mysource") + node.close_dataset() + + def test_repr_str(self): + node = BaseFileSource(source="mysource") + assert "source=" in repr(node) + assert "source=" in str(node) + + +# --------------------------------------------------------------------------------------------------------------------- +# LoadFileMixin +# --------------------------------------------------------------------------------------------------------------------- + + +class MockLoadFile(LoadFileMixin, BaseFileSource): + def open_dataset(self, f): + return None + + +class TestLoadFile(object): + def test_open_dataset_not_implemented(self): + node = LoadFileMixin() + with pytest.raises(NotImplementedError): + node.open_dataset(None) + + def test_local(self): + path = os.path.join(os.path.dirname(__file__), "assets/points-single.csv") + node = MockLoadFile(source=path) + node.dataset + + @pytest.mark.aws + def test_s3(self): + # TODO replace this with a better public s3 fileobj for testing + path = "s3://modis-pds/MCD43A4.006/00/08/2020018/MCD43A4.A2020018.h00v08.006.2020027031229_meta.json" + node = MockLoadFile(source=path) + node.dataset + + @pytest.mark.aws # TODO + def test_ftp(self): + node = MockLoadFile(source="ftp://speedtest.tele2.net/1KB.zip") + node.dataset + + @pytest.mark.aws # TODO + def test_http(self): + node = MockLoadFile(source="https://httpstat.us/200") + node.dataset + + def test_file(self): + path = os.path.join(os.path.dirname(__file__), "assets/points-single.csv") + node = MockLoadFile(source="file:///%s" % path) + node.dataset + + def test_cache_dataset(self): + path = os.path.join(os.path.dirname(__file__), "assets/points-single.csv") + + with podpac.settings: + podpac.settings["DEFAULT_CACHE"] = ["ram"] + node = MockLoadFile(source="file:///%s" % path, cache_dataset=True) + node.dataset + + # node caches dataset object + assert node._dataset_caching_node.has_cache("dataset") + + # another node can get cached object + node2 = MockLoadFile(source="file:///%s" % path) + assert node2._dataset_caching_node.has_cache("dataset") + node2.dataset + + +# --------------------------------------------------------------------------------------------------------------------- +# FileKeysMixin +# --------------------------------------------------------------------------------------------------------------------- + + +class MockFileKeys(FileKeysMixin, BaseFileSource): + source = "mock-single" + dataset = {"lat": LAT, "lon": LON, "time": TIME, "alt": ALT, "data": DATA} + keys = ["lat", "lon", "time", "alt", "data"] + dims = ["lat", "lon", "time", "alt"] + + +class MockFileKeysMultipleAvailable(FileKeysMixin, BaseFileSource): + source = "mock-multiple" + dataset = {"lat": LAT, "lon": LON, "time": TIME, "alt": ALT, "data": DATA, "other": OTHER} + keys = ["lat", "lon", "time", "alt", "data", "other"] + dims = ["lat", "lon", "time", "alt"] + + +class MockFileKeysEmpty(FileKeysMixin, BaseFileSource): + source = "mock-empty" + dataset = {"lat": LAT, "lon": LON, "time": TIME, "alt": ALT} + keys = ["lat", "lon", "time", "alt"] + dims = ["lat", "lon", "time", "alt"] + + +class TestFileKeys(object): + def test_not_implemented(self): + class MySource(FileKeysMixin, BaseFileSource): + pass + + node = MySource(source="mysource") + + with pytest.raises(NotImplementedError): + node.keys + + with pytest.raises(NotImplementedError): + node.dims + + def test_available_data_keys(self): + node = MockFileKeys() + assert node.available_data_keys == ["data"] + + node = MockFileKeysMultipleAvailable() + assert node.available_data_keys == ["data", "other"] + + node = MockFileKeysEmpty() + with pytest.raises(ValueError, match="No data keys found"): + node.available_data_keys + + def test_data_key(self): + node = MockFileKeys() + assert node.data_key == "data" + + node = MockFileKeys(data_key="data") + assert node.data_key == "data" + + with pytest.raises(ValueError, match="Invalid data_key"): + node = MockFileKeys(data_key="misc") + + def test_data_key_multiple_outputs(self): + node = MockFileKeysMultipleAvailable() + assert node.data_key == ["data", "other"] + + node = MockFileKeysMultipleAvailable(data_key=["other", "data"]) + assert node.data_key == ["other", "data"] + + node = MockFileKeysMultipleAvailable(data_key="other") + assert node.data_key == "other" + + with pytest.raises(ValueError, match="Invalid data_key"): + node = MockFileKeysMultipleAvailable(data_key=["data", "misc"]) + + with pytest.raises(ValueError, match="Invalid data_key"): + node = MockFileKeysMultipleAvailable(data_key="misc") + + def test_no_outputs(self): + node = MockFileKeys(data_key="data") + assert node.outputs == None + + node = MockFileKeysMultipleAvailable(data_key="data") + assert node.outputs == None + + with pytest.raises(TypeError, match="outputs must be None for single-output nodes"): + node = MockFileKeys(data_key="data", outputs=["a"]) + + with pytest.raises(TypeError, match="outputs must be None for single-output nodes"): + node = MockFileKeysMultipleAvailable(data_key="data", outputs=["a"]) + + with pytest.raises(TypeError, match="outputs must be None for single-output nodes"): + node = MockFileKeys(outputs=["a"]) + + def test_outputs(self): + # for multi-output nodes, use the dataset's keys by default + node = MockFileKeys(data_key=["data"]) + assert node.outputs == ["data"] + + node = MockFileKeysMultipleAvailable(data_key=["data", "other"]) + assert node.outputs == ["data", "other"] + + node = MockFileKeysMultipleAvailable(data_key=["data"]) + assert node.outputs == ["data"] + + # alternate outputs names can be specified + node = MockFileKeys(data_key=["data"], outputs=["a"]) + assert node.outputs == ["a"] + + node = MockFileKeysMultipleAvailable(data_key=["data", "other"], outputs=["a", "b"]) + assert node.outputs == ["a", "b"] + + node = MockFileKeysMultipleAvailable(data_key=["data"], outputs=["a"]) + assert node.outputs == ["a"] + + node = MockFileKeysMultipleAvailable(outputs=["a", "b"]) + assert node.outputs == ["a", "b"] + + # but the outputs and data_key must match + with pytest.raises(TypeError, match="outputs and data_key mismatch"): + node = MockFileKeysMultipleAvailable(data_key=["data"], outputs=None) + + with pytest.raises(ValueError, match="outputs and data_key size mismatch"): + node = MockFileKeysMultipleAvailable(data_key=["data"], outputs=["a", "b"]) + + with pytest.raises(ValueError, match="outputs and data_key size mismatch"): + node = MockFileKeysMultipleAvailable(data_key=["data", "other"], outputs=["a"]) + + def test_coordinates(self): + node = MockFileKeys() + nc = node.coordinates + assert nc.dims == ("lat", "lon", "time", "alt") + np.testing.assert_array_equal(nc["lat"].coordinates, LAT) + np.testing.assert_array_equal(nc["lon"].coordinates, LON) + np.testing.assert_array_equal(nc["time"].coordinates, TIME) + np.testing.assert_array_equal(nc["alt"].coordinates, ALT) + + def test_repr_str(self): + node = MockFileKeys() + + assert "source=" in repr(node) + assert "data_key=" not in repr(node) + + assert "source=" in str(node) + assert "data_key=" not in str(node) + + def test_repr_str_multiple_outputs(self): + node = MockFileKeysMultipleAvailable() + + assert "source=" in repr(node) + assert "data_key=" not in repr(node) + + assert "source=" in str(node) + assert "data_key=" not in str(node) + + node = MockFileKeysMultipleAvailable(data_key="data") + + assert "source=" in repr(node) + assert "data_key=" in repr(node) + + assert "source=" in str(node) + assert "data_key=" in str(node) diff --git a/podpac/core/data/test/test_h5py.py b/podpac/core/data/test/test_h5py.py index c34ed3253..ebb76f736 100644 --- a/podpac/core/data/test/test_h5py.py +++ b/podpac/core/data/test/test_h5py.py @@ -2,7 +2,7 @@ import numpy as np -from podpac.core.data.file import H5PY +from podpac.core.data.h5py_source import H5PY class TestH5PY(object): @@ -17,51 +17,51 @@ def test_dims(self): assert node.dims == ["lat", "lon"] node.close_dataset() - def test_available_keys(self): + def test_available_data_keys(self): node = H5PY(source=self.source, data_key="/data/init", lat_key="/coords/lat", lon_key="/coords/lon") - assert node.available_keys == ["/data/init"] + assert node.available_data_keys == ["/data/init"] node.close_dataset() - def test_native_coordinates(self): + def test_coordinates(self): node = H5PY(source=self.source, data_key="/data/init", lat_key="/coords/lat", lon_key="/coords/lon") - nc = node.native_coordinates - assert node.native_coordinates.shape == (3, 4) - np.testing.assert_array_equal(node.native_coordinates["lat"].coordinates, [45.1, 45.2, 45.3]) - np.testing.assert_array_equal(node.native_coordinates["lon"].coordinates, [-100.1, -100.2, -100.3, -100.4]) + nc = node.coordinates + assert node.coordinates.shape == (3, 4) + np.testing.assert_array_equal(node.coordinates["lat"].coordinates, [45.1, 45.2, 45.3]) + np.testing.assert_array_equal(node.coordinates["lon"].coordinates, [-100.1, -100.2, -100.3, -100.4]) node.close_dataset() def test_data(self): node = H5PY(source=self.source, data_key="/data/init", lat_key="/coords/lat", lon_key="/coords/lon") - o = node.eval(node.native_coordinates) + o = node.eval(node.coordinates) np.testing.assert_array_equal(o.data.ravel(), np.arange(12)) node.close_dataset() # default node = H5PY(source=self.source, lat_key="/coords/lat", lon_key="/coords/lon") - o = node.eval(node.native_coordinates) + o = node.eval(node.coordinates) np.testing.assert_array_equal(o.data.ravel(), np.arange(12)) node.close_dataset() def test_data_multiple(self): node = H5PY( source=self.source, - output_keys=["/data/init", "/data/init"], + data_key=["/data/init", "/data/init"], outputs=["a", "b"], lat_key="/coords/lat", lon_key="/coords/lon", ) - o = node.eval(node.native_coordinates) + o = node.eval(node.coordinates) assert o.dims == ("lat", "lon", "output") np.testing.assert_array_equal(o["output"], ["a", "b"]) np.testing.assert_array_equal(o.sel(output="a").data.ravel(), np.arange(12)) np.testing.assert_array_equal(o.sel(output="b").data.ravel(), np.arange(12)) node.close_dataset() - def test_attrs(self): + def test_dataset_attrs(self): node = H5PY(source=self.source, data_key="/data/init", lat_key="/coords/lat", lon_key="/coords/lon") - assert node.attrs() == {} - assert node.attrs("data") == {"test": "test"} - assert node.attrs("coords/lat") == {"unit": "degrees"} - assert node.attrs("coords/lon") == {"unit": "degrees"} - assert node.attrs("coords") == {"crs": "EPSG:4326s"} + assert node.dataset_attrs() == {} + assert node.dataset_attrs("data") == {"test": "test"} + assert node.dataset_attrs("coords/lat") == {"unit": "degrees"} + assert node.dataset_attrs("coords/lon") == {"unit": "degrees"} + assert node.dataset_attrs("coords") == {"crs": "EPSG:4326s"} node.close_dataset() diff --git a/podpac/core/data/test/test_integration.py b/podpac/core/data/test/test_integration.py index 089746b99..f5337812d 100644 --- a/podpac/core/data/test/test_integration.py +++ b/podpac/core/data/test/test_integration.py @@ -29,7 +29,7 @@ def test_array(self): lat = np.random.rand(16) lon = np.random.rand(16) coord = Coordinate(lat_lon=(lat, lon), time=(0, 10, 11), order=["lat_lon", "time"]) - node = Array(source=arr, native_coordinates=coord) + node = Array(source=arr, coordinates=coord) coordg = Coordinate(lat=(0, 1, 8), lon=(0, 1, 8), order=("lat", "lon")) coordt = Coordinate(time=(3, 5, 2)) @@ -85,11 +85,11 @@ def setup_method(self, method): self.lonSource = LON self.timeSource = TIME - self.nasLat = Array(source=LAT.astype(float), native_coordinates=self.coord_src, interpolation="bilinear") + self.nasLat = Array(source=LAT.astype(float), coordinates=self.coord_src, interpolation="bilinear") - self.nasLon = Array(source=LON.astype(float), native_coordinates=self.coord_src, interpolation="bilinear") + self.nasLon = Array(source=LON.astype(float), coordinates=self.coord_src, interpolation="bilinear") - self.nasTime = Array(source=TIME.astype(float), native_coordinates=self.coord_src, interpolation="bilinear") + self.nasTime = Array(source=TIME.astype(float), coordinates=self.coord_src, interpolation="bilinear") def test_raster_to_raster(self): coord_dst = Coordinates([clinspace(5.0, 40.0, 50), clinspace(-68.0, -66.0, 100)], dims=["lat", "lon"]) diff --git a/podpac/core/data/test/test_interpolate.py b/podpac/core/data/test/test_interpolate.py deleted file mode 100644 index ad0ccaa69..000000000 --- a/podpac/core/data/test/test_interpolate.py +++ /dev/null @@ -1,743 +0,0 @@ -""" -Test interpolation methods - - -""" -# pylint: disable=C0111,W0212,R0903 - -from collections import OrderedDict -from copy import deepcopy - -import pytest -import traitlets as tl -import numpy as np - -from podpac.core.units import UnitsDataArray -from podpac.core.coordinates import Coordinates, clinspace -from podpac.core.data.file import rasterio -from podpac.core.data import datasource -from podpac.core.data.interpolation import ( - Interpolation, - InterpolationException, - INTERPOLATORS, - INTERPOLATION_METHODS, - INTERPOLATION_DEFAULT, - INTERPOLATORS_DICT, - INTERPOLATION_METHODS_DICT, -) - -from podpac.core.data.interpolator import Interpolator, InterpolatorException -from podpac.core.data.interpolators import NearestNeighbor, NearestPreview, Rasterio, ScipyGrid, ScipyPoint - -# test fixtures -from podpac.core.data.test.test_datasource import MockArrayDataSource - - -class TestInterpolation(object): - """ Test interpolation class and support methods""" - - def test_allow_missing_modules(self): - """TODO: Allow user to be missing rasterio and scipy""" - pass - - def test_interpolation_methods(self): - assert len(set(INTERPOLATION_METHODS) & set(INTERPOLATION_METHODS_DICT.keys())) == len(INTERPOLATION_METHODS) - - def test_interpolator_init_type(self): - """test constructor - """ - - # should throw an error if definition is not str, dict, or Interpolator - with pytest.raises(TypeError): - Interpolation(5) - - def test_str_definition(self): - # should throw an error if string input is not one of the INTERPOLATION_METHODS - with pytest.raises(InterpolationException): - Interpolation("test") - - interp = Interpolation("nearest") - assert interp.config[("default",)] - assert isinstance(interp.config[("default",)], dict) - assert interp.config[("default",)]["method"] == "nearest" - assert isinstance(interp.config[("default",)]["interpolators"][0], Interpolator) - - def test_dict_definition(self): - - # should handle a default definition without any dimensions - interp = Interpolation({"method": "nearest", "params": {"spatial_tolerance": 1}}) - assert isinstance(interp.config[("default",)], dict) - assert interp.config[("default",)]["method"] == "nearest" - assert isinstance(interp.config[("default",)]["interpolators"][0], Interpolator) - assert interp.config[("default",)]["params"] == {"spatial_tolerance": 1} - - # handle string methods - interp = Interpolation({"method": "nearest", "dims": ["lat", "lon"]}) - print(interp.config) - assert isinstance(interp.config[("lat", "lon")], dict) - assert interp.config[("lat", "lon")]["method"] == "nearest" - assert isinstance(interp.config[("default",)]["interpolators"][0], Interpolator) - assert interp.config[("default",)]["params"] == {} - - # handle dict methods - - # should throw an error if method is not in dict - with pytest.raises(InterpolationException): - Interpolation([{"test": "test", "dims": ["lat", "lon"]}]) - - # should throw an error if method is not a string - with pytest.raises(InterpolationException): - Interpolation([{"method": 5, "dims": ["lat", "lon"]}]) - - # should throw an error if method is not one of the INTERPOLATION_METHODS and no interpolators defined - with pytest.raises(InterpolationException): - Interpolation([{"method": "myinter", "dims": ["lat", "lon"]}]) - - # should throw an error if params is not a dict - with pytest.raises(TypeError): - Interpolation([{"method": "nearest", "dims": ["lat", "lon"], "params": "test"}]) - - # should throw an error if interpolators is not a list - with pytest.raises(TypeError): - Interpolation([{"method": "nearest", "interpolators": "test", "dims": ["lat", "lon"]}]) - - # should throw an error if interpolators are not Interpolator classes - with pytest.raises(TypeError): - Interpolation([{"method": "nearest", "interpolators": [NearestNeighbor, "test"], "dims": ["lat", "lon"]}]) - - # should throw an error if dimension is defined twice - with pytest.raises(InterpolationException): - Interpolation([{"method": "nearest", "dims": ["lat", "lon"]}, {"method": "bilinear", "dims": ["lat"]}]) - - # should throw an error if dimension is not a list - with pytest.raises(TypeError): - Interpolation([{"method": "nearest", "dims": "lat"}]) - - # should handle standard INTEPROLATION_SHORTCUTS - interp = Interpolation([{"method": "nearest", "dims": ["lat", "lon"]}]) - assert isinstance(interp.config[("lat", "lon")], dict) - assert interp.config[("lat", "lon")]["method"] == "nearest" - assert isinstance(interp.config[("lat", "lon")]["interpolators"][0], Interpolator) - assert interp.config[("lat", "lon")]["params"] == {} - - # should not allow custom methods if interpolators can't support - with pytest.raises(InterpolatorException): - interp = Interpolation( - [{"method": "myinter", "interpolators": [NearestNeighbor, NearestPreview], "dims": ["lat", "lon"]}] - ) - - # should allow custom methods if interpolators can support - class MyInterp(Interpolator): - methods_supported = ["myinter"] - - interp = Interpolation([{"method": "myinter", "interpolators": [MyInterp], "dims": ["lat", "lon"]}]) - assert interp.config[("lat", "lon")]["method"] == "myinter" - assert isinstance(interp.config[("lat", "lon")]["interpolators"][0], MyInterp) - - # should allow params to be set - interp = Interpolation( - [ - { - "method": "myinter", - "interpolators": [MyInterp], - "params": {"spatial_tolerance": 5}, - "dims": ["lat", "lon"], - } - ] - ) - - assert interp.config[("lat", "lon")]["params"] == {"spatial_tolerance": 5} - - # set default equal to empty tuple - interp = Interpolation([{"method": "bilinear", "dims": ["lat"]}]) - assert interp.config[("default",)]["method"] == INTERPOLATION_DEFAULT - - # use default with override if not all dimensions are supplied - interp = Interpolation([{"method": "bilinear", "dims": ["lat"]}, "nearest"]) - assert interp.config[("default",)]["method"] == "nearest" - - # make sure default is always the last key in the ordered config dict - interp = Interpolation(["nearest", {"method": "bilinear", "dims": ["lat"]}]) - assert list(interp.config.keys())[-1] == ("default",) - - # should sort the dims keys - interp = Interpolation(["nearest", {"method": "bilinear", "dims": ["lon", "lat"]}]) - assert interp.config[("lat", "lon")]["method"] == "bilinear" - - def test_init_interpolators(self): - - # should set method - interp = Interpolation("nearest") - assert interp.config[("default",)]["interpolators"][0].method == "nearest" - - # Interpolation init should init all interpolators in the list - interp = Interpolation([{"method": "nearest", "params": {"spatial_tolerance": 1}}]) - assert interp.config[("default",)]["interpolators"][0].spatial_tolerance == 1 - - # should throw TraitErrors defined by Interpolator - with pytest.raises(tl.TraitError): - Interpolation([{"method": "nearest", "params": {"spatial_tolerance": "tol"}}]) - - # should not allow undefined params - with pytest.warns(DeprecationWarning): # eventually, Traitlets will raise an exception here - interp = Interpolation([{"method": "nearest", "params": {"myarg": 1}}]) - with pytest.raises(AttributeError): - assert interp.config[("default",)]["interpolators"][0].myarg == "tol" - - def test_select_interpolator_queue(self): - - reqcoords = Coordinates([[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"]) - srccoords = Coordinates([[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"]) - - # create a few dummy interpolators that handle certain dimensions - # (can_select is defined by default to look at dims_supported) - class TimeLat(Interpolator): - methods_supported = ["myinterp"] - dims_supported = ["time", "lat"] - - def can_select(self, udims, source_coordinates, eval_coordinates): - return self._filter_udims_supported(udims) - - def can_interpolate(self, udims, source_coordinates, eval_coordinates): - return self._filter_udims_supported(udims) - - class LatLon(Interpolator): - methods_supported = ["myinterp"] - dims_supported = ["lat", "lon"] - - def can_select(self, udims, source_coordinates, eval_coordinates): - return self._filter_udims_supported(udims) - - def can_interpolate(self, udims, source_coordinates, eval_coordinates): - return self._filter_udims_supported(udims) - - class Lon(Interpolator): - methods_supported = ["myinterp"] - dims_supported = ["lon"] - - def can_select(self, udims, source_coordinates, eval_coordinates): - return self._filter_udims_supported(udims) - - def can_interpolate(self, udims, source_coordinates, eval_coordinates): - return self._filter_udims_supported(udims) - - # set up a strange interpolation definition - # we want to interpolate (lat, lon) first, then after (time, alt) - interp = Interpolation( - [ - {"method": "myinterp", "interpolators": [LatLon, TimeLat], "dims": ["lat", "lon"]}, - {"method": "myinterp", "interpolators": [TimeLat, Lon], "dims": ["time", "alt"]}, - ] - ) - - # default = 'nearest', which will return NearestPreview for can_select - interpolator_queue = interp._select_interpolator_queue(srccoords, reqcoords, "can_select") - assert isinstance(interpolator_queue, OrderedDict) - assert isinstance(interpolator_queue[("lat", "lon")], LatLon) - assert ("time", "alt") not in interpolator_queue and ("alt", "time") not in interpolator_queue - - # should throw an error if strict is set and not all dimensions can be handled - with pytest.raises(InterpolationException): - interp_copy = deepcopy(interp) - del interp_copy.config[("default",)] - interpolator_queue = interp_copy._select_interpolator_queue(srccoords, reqcoords, "can_select", strict=True) - - # default = Nearest, which can handle all dims for can_interpolate - interpolator_queue = interp._select_interpolator_queue(srccoords, reqcoords, "can_interpolate") - assert isinstance(interpolator_queue, OrderedDict) - assert isinstance(interpolator_queue[("lat", "lon")], LatLon) - - if ("alt", "time") in interpolator_queue: - assert isinstance(interpolator_queue[("alt", "time")], NearestNeighbor) - else: - assert isinstance(interpolator_queue[("time", "alt")], NearestNeighbor) - - def test_select_coordinates(self): - - reqcoords = Coordinates( - [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"], crs="+proj=merc +vunits=m" - ) - srccoords = Coordinates( - [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"], crs="+proj=merc +vunits=m" - ) - - # create a few dummy interpolators that handle certain dimensions - # (can_select is defined by default to look at dims_supported) - class TimeLat(Interpolator): - methods_supported = ["myinterp"] - dims_supported = ["time", "lat"] - - def select_coordinates(self, udims, srccoords, srccoords_idx, reqcoords): - return srccoords, srccoords_idx - - class LatLon(Interpolator): - methods_supported = ["myinterp"] - dims_supported = ["lat", "lon"] - - def select_coordinates(self, udims, srccoords, srccoords_idx, reqcoords): - return srccoords, srccoords_idx - - class Lon(Interpolator): - methods_supported = ["myinterp"] - dims_supported = ["lon"] - - def select_coordinates(self, udims, srccoords, srccoords_idx, reqcoords): - return srccoords, srccoords_idx - - # set up a strange interpolation definition - # we want to interpolate (lat, lon) first, then after (time, alt) - interp = Interpolation( - [ - {"method": "myinterp", "interpolators": [LatLon, TimeLat], "dims": ["lat", "lon"]}, - {"method": "myinterp", "interpolators": [TimeLat, Lon], "dims": ["time", "alt"]}, - ] - ) - - coords, cidx = interp.select_coordinates(srccoords, [], reqcoords) - - assert len(coords) == len(srccoords) - assert len(coords["lat"]) == len(srccoords["lat"]) - assert cidx == () - - def test_interpolate(self): - class TestInterp(Interpolator): - dims_supported = ["lat", "lon"] - - def interpolate(self, udims, source_coordinates, source_data, eval_coordinates, output_data): - output_data = source_data - return output_data - - # test basic functionality - reqcoords = Coordinates([[-0.5, 1.5, 3.5], [0.5, 2.5, 4.5]], dims=["lat", "lon"]) - srccoords = Coordinates([[0, 2, 4], [0, 3, 4]], dims=["lat", "lon"]) - srcdata = UnitsDataArray( - np.random.rand(3, 3), coords=[srccoords[c].coordinates for c in srccoords], dims=srccoords.dims - ) - outdata = UnitsDataArray( - np.zeros(srcdata.shape), coords=[reqcoords[c].coordinates for c in reqcoords], dims=reqcoords.dims - ) - - interp = Interpolation({"method": "myinterp", "interpolators": [TestInterp], "dims": ["lat", "lon"]}) - outdata = interp.interpolate(srccoords, srcdata, reqcoords, outdata) - - assert np.all(outdata == srcdata) - - # test if data is size 1 - class TestFakeInterp(Interpolator): - dims_supported = ["lat"] - - def interpolate(self, udims, source_coordinates, source_data, eval_coordinates, output_data): - return None - - reqcoords = Coordinates([[1]], dims=["lat"]) - srccoords = Coordinates([[1]], dims=["lat"]) - srcdata = UnitsDataArray( - np.random.rand(1), coords=[srccoords[c].coordinates for c in srccoords], dims=srccoords.dims - ) - outdata = UnitsDataArray( - np.zeros(srcdata.shape), coords=[reqcoords[c].coordinates for c in reqcoords], dims=reqcoords.dims - ) - - interp = Interpolation({"method": "myinterp", "interpolators": [TestFakeInterp], "dims": ["lat", "lon"]}) - outdata = interp.interpolate(srccoords, srcdata, reqcoords, outdata) - - assert np.all(outdata == srcdata) - - -class TestInterpolators(object): - class TestInterpolator(object): - """Test abstract interpolator class""" - - def test_can_select(self): - class CanAlwaysSelect(Interpolator): - def can_select(self, udims, reqcoords, srccoords): - return udims - - class CanNeverSelect(Interpolator): - def can_select(self, udims, reqcoords, srccoords): - return tuple() - - interp = CanAlwaysSelect(method="method") - can_select = interp.can_select(("time", "lat"), None, None) - assert "lat" in can_select and "time" in can_select - - interp = CanNeverSelect(method="method") - can_select = interp.can_select(("time", "lat"), None, None) - assert not can_select - - def test_dim_in(self): - interpolator = Interpolator(methods_supported=["test"], method="test") - - coords = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) - assert interpolator._dim_in("lat", coords) - assert interpolator._dim_in("lat", coords, unstacked=True) - assert not interpolator._dim_in("time", coords) - - coords_two = Coordinates([clinspace(0, 10, 5)], dims=["lat"]) - assert interpolator._dim_in("lat", coords, coords_two) - assert not interpolator._dim_in("lon", coords, coords_two) - - coords_three = Coordinates([(np.linspace(0, 10, 5), np.linspace(0, 10, 5))], dims=["lat_lon"]) - assert not interpolator._dim_in("lat", coords, coords_two, coords_three) - assert interpolator._dim_in("lat", coords, coords_two, coords_three, unstacked=True) - - class TestNearest(object): - def test_nearest_preview_select(self): - - # test straight ahead functionality - reqcoords = Coordinates([[-0.5, 1.5, 3.5], [0.5, 2.5, 4.5]], dims=["lat", "lon"]) - srccoords = Coordinates([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dims=["lat", "lon"]) - - interp = Interpolation("nearest_preview") - - srccoords, srccoords_index = srccoords.intersect(reqcoords, outer=True, return_indices=True) - coords, cidx = interp.select_coordinates(srccoords, srccoords_index, reqcoords) - - assert len(coords) == len(srccoords) == len(cidx) - assert len(coords["lat"]) == len(reqcoords["lat"]) - assert len(coords["lon"]) == len(reqcoords["lon"]) - assert np.all(coords["lat"].coordinates == np.array([0, 2, 4])) - - # test when selection is applied serially - # this is equivalent to above - reqcoords = Coordinates([[-0.5, 1.5, 3.5], [0.5, 2.5, 4.5]], dims=["lat", "lon"]) - srccoords = Coordinates([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dims=["lat", "lon"]) - - interp = Interpolation( - [{"method": "nearest_preview", "dims": ["lat"]}, {"method": "nearest_preview", "dims": ["lon"]}] - ) - - srccoords, srccoords_index = srccoords.intersect(reqcoords, outer=True, return_indices=True) - coords, cidx = interp.select_coordinates(srccoords, srccoords_index, reqcoords) - - assert len(coords) == len(srccoords) == len(cidx) - assert len(coords["lat"]) == len(reqcoords["lat"]) - assert len(coords["lon"]) == len(reqcoords["lon"]) - assert np.all(coords["lat"].coordinates == np.array([0, 2, 4])) - - # test when coordinates are stacked and unstacked - # TODO: how to handle stacked/unstacked coordinate asynchrony? - # reqcoords = Coordinates([[-.5, 1.5, 3.5], [.5, 2.5, 4.5]], dims=['lat', 'lon']) - # srccoords = Coordinates([([0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5])], dims=['lat_lon']) - - # interp = Interpolation('nearest_preview') - - # srccoords, srccoords_index = srccoords.intersect(reqcoords, outer=True, return_indices=True) - # coords, cidx = interp.select_coordinates(reqcoords, srccoords, srccoords_index) - - # assert len(coords) == len(srcoords) == len(cidx) - # assert len(coords['lat']) == len(reqcoords['lat']) - # assert len(coords['lon']) == len(reqcoords['lon']) - # assert np.all(coords['lat'].coordinates == np.array([0, 2, 4])) - - def test_interpolation(self): - - for interpolation in ["nearest", "nearest_preview"]: - - # unstacked 1D - source = np.random.rand(5) - coords_src = Coordinates([np.linspace(0, 10, 5)], dims=["lat"]) - node = MockArrayDataSource(source=source, native_coordinates=coords_src, interpolation=interpolation) - - coords_dst = Coordinates([[1, 1.2, 1.5, 5, 9]], dims=["lat"]) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert output.values[0] == source[0] and output.values[1] == source[0] and output.values[2] == source[1] - - # unstacked N-D - source = np.random.rand(5, 5) - coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) - coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) - - node = MockArrayDataSource(source=source, native_coordinates=coords_src, interpolation=interpolation) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert output.values[0, 0] == source[1, 1] - - # stacked - # TODO: implement stacked handling - source = np.random.rand(5) - coords_src = Coordinates([(np.linspace(0, 10, 5), np.linspace(0, 10, 5))], dims=["lat_lon"]) - node = MockArrayDataSource(source=source, native_coordinates=coords_src) - node.interpolation = {"method": "nearest", "interpolators": [NearestNeighbor]} - coords_dst = Coordinates([(np.linspace(1, 9, 3), np.linspace(1, 9, 3))], dims=["lat_lon"]) - - with pytest.raises(InterpolationException): - output = node.eval(coords_dst) - - # TODO: implement stacked handling - # source = stacked, dest = unstacked - source = np.random.rand(5) - coords_src = Coordinates([(np.linspace(0, 10, 5), np.linspace(0, 10, 5))], dims=["lat_lon"]) - node = MockArrayDataSource(source=source, native_coordinates=coords_src) - node.interpolation = {"method": "nearest", "interpolators": [NearestNeighbor]} - coords_dst = Coordinates([np.linspace(1, 9, 3), np.linspace(1, 9, 3)], dims=["lat", "lon"]) - - with pytest.raises(InterpolationException): - output = node.eval(coords_dst) - - # TODO: implement stacked handling - # source = unstacked, dest = stacked - source = np.random.rand(5, 5) - coords_src = Coordinates([np.linspace(0, 10, 5), np.linspace(0, 10, 5)], dims=["lat", "lon"]) - node = MockArrayDataSource(source=source, native_coordinates=coords_src) - node.interpolation = {"method": "nearest", "interpolators": [NearestNeighbor]} - coords_dst = Coordinates([(np.linspace(1, 9, 3), np.linspace(1, 9, 3))], dims=["lat_lon"]) - - with pytest.raises(InterpolationException): - output = node.eval(coords_dst) - - def test_spatial_tolerance(self): - - # unstacked 1D - source = np.random.rand(5) - coords_src = Coordinates([np.linspace(0, 10, 5)], dims=["lat"]) - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={"method": "nearest", "params": {"spatial_tolerance": 1.1}}, - ) - - coords_dst = Coordinates([[1, 1.2, 1.5, 5, 9]], dims=["lat"]) - output = node.eval(coords_dst) - - print(output) - print(source) - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert output.values[0] == source[0] and np.isnan(output.values[1]) and output.values[2] == source[1] - - def test_time_tolerance(self): - - # unstacked 1D - source = np.random.rand(5, 5) - coords_src = Coordinates( - [np.linspace(0, 10, 5), clinspace("2018-01-01", "2018-01-09", 5)], dims=["lat", "time"] - ) - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={ - "method": "nearest", - "params": {"spatial_tolerance": 1.1, "time_tolerance": np.timedelta64(1, "D")}, - }, - ) - - coords_dst = Coordinates( - [[1, 1.2, 1.5, 5, 9], clinspace("2018-01-01", "2018-01-09", 3)], dims=["lat", "time"] - ) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert ( - output.values[0, 0] == source[0, 0] - and output.values[0, 1] == source[0, 2] - and np.isnan(output.values[1, 0]) - and np.isnan(output.values[1, 1]) - and output.values[2, 0] == source[1, 0] - and output.values[2, 1] == source[1, 2] - ) - - class TestInterpolateRasterio(object): - """test interpolation functions""" - - def test_interpolate_rasterio(self): - """ regular interpolation using rasterio""" - - assert rasterio is not None - - source = np.arange(0, 15) - source.resize((3, 5)) - - coords_src = Coordinates([clinspace(0, 10, 3), clinspace(0, 10, 5)], dims=["lat", "lon"]) - coords_dst = Coordinates([clinspace(1, 11, 3), clinspace(1, 11, 5)], dims=["lat", "lon"]) - - # try one specific rasterio case to measure output - node = MockArrayDataSource(source=source, native_coordinates=coords_src) - node.interpolation = {"method": "min", "interpolators": [Rasterio]} - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert output.data[0, 3] == 3.0 - assert output.data[0, 4] == 4.0 - - node.interpolation = {"method": "max", "interpolators": [Rasterio]} - output = node.eval(coords_dst) - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert output.data[0, 3] == 9.0 - assert output.data[0, 4] == 9.0 - - node.interpolation = {"method": "bilinear", "interpolators": [Rasterio]} - output = node.eval(coords_dst) - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert int(output.data[0, 0]) == 1 - assert int(output.data[0, 4]) == 5 - - def test_interpolate_rasterio_descending(self): - """should handle descending""" - - source = np.random.rand(5, 5) - coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) - coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) - - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={"method": "nearest", "interpolators": [Rasterio]}, - ) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert np.all(output.lon.values == coords_dst.coords["lon"]) - - class TestInterpolateScipyGrid(object): - """test interpolation functions""" - - def test_interpolate_scipy_grid(self): - - source = np.arange(0, 25) - source.resize((5, 5)) - - coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) - coords_dst = Coordinates([clinspace(1, 11, 5), clinspace(1, 11, 5)], dims=["lat", "lon"]) - - # try one specific rasterio case to measure output - node = MockArrayDataSource(source=source, native_coordinates=coords_src) - node.interpolation = {"method": "nearest", "interpolators": [ScipyGrid]} - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - print(output) - assert output.data[0, 0] == 0.0 - assert output.data[0, 3] == 3.0 - assert output.data[1, 3] == 8.0 - assert np.isnan(output.data[0, 4]) # TODO: how to handle outside bounds - - node.interpolation = {"method": "cubic_spline", "interpolators": [ScipyGrid]} - output = node.eval(coords_dst) - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert int(output.data[0, 0]) == 2 - assert int(output.data[2, 4]) == 16 - - node.interpolation = {"method": "bilinear", "interpolators": [ScipyGrid]} - output = node.eval(coords_dst) - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert int(output.data[0, 0]) == 2 - assert int(output.data[3, 3]) == 20 - assert np.isnan(output.data[4, 4]) # TODO: how to handle outside bounds - - def test_interpolate_irregular_arbitrary_2dims(self): - """ irregular interpolation """ - - # try >2 dims - source = np.random.rand(5, 5, 3) - coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5), [2, 3, 5]], dims=["lat", "lon", "time"]) - coords_dst = Coordinates([clinspace(1, 11, 5), clinspace(1, 11, 5), [2, 3, 5]], dims=["lat", "lon", "time"]) - - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={"method": "nearest", "interpolators": [ScipyGrid]}, - ) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert np.all(output.lon.values == coords_dst.coords["lon"]) - assert np.all(output.time.values == coords_dst.coords["time"]) - - # assert output.data[0, 0] == source[] - - def test_interpolate_irregular_arbitrary_descending(self): - """should handle descending""" - - source = np.random.rand(5, 5) - coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) - coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) - - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={"method": "nearest", "interpolators": [ScipyGrid]}, - ) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert np.all(output.lon.values == coords_dst.coords["lon"]) - - def test_interpolate_irregular_arbitrary_swap(self): - """should handle descending""" - - source = np.random.rand(5, 5) - coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) - coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) - - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={"method": "nearest", "interpolators": [ScipyGrid]}, - ) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert np.all(output.lon.values == coords_dst.coords["lon"]) - - def test_interpolate_irregular_lat_lon(self): - """ irregular interpolation """ - - source = np.random.rand(5, 5) - coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) - coords_dst = Coordinates([[[0, 2, 4, 6, 8, 10], [0, 2, 4, 5, 6, 10]]], dims=["lat_lon"]) - - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={"method": "nearest", "interpolators": [ScipyGrid]}, - ) - output = node.eval(coords_dst) - - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat_lon.values == coords_dst.coords["lat_lon"]) - assert output.values[0] == source[0, 0] - assert output.values[1] == source[1, 1] - assert output.values[-1] == source[-1, -1] - - class TestInterpolateScipyPoint(object): - def test_interpolate_scipy_point(self): - """ interpolate point data to nearest neighbor with various coords_dst""" - - source = np.random.rand(6) - coords_src = Coordinates([[[0, 2, 4, 6, 8, 10], [0, 2, 4, 5, 6, 10]]], dims=["lat_lon"]) - coords_dst = Coordinates([[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]], dims=["lat_lon"]) - node = MockArrayDataSource( - source=source, - native_coordinates=coords_src, - interpolation={"method": "nearest", "interpolators": [ScipyPoint]}, - ) - - output = node.eval(coords_dst) - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat_lon.values == coords_dst.coords["lat_lon"]) - assert output.values[0] == source[0] - assert output.values[-1] == source[3] - - coords_dst = Coordinates([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], dims=["lat", "lon"]) - output = node.eval(coords_dst) - assert isinstance(output, UnitsDataArray) - assert np.all(output.lat.values == coords_dst.coords["lat"]) - assert output.values[0, 0] == source[0] - assert output.values[-1, -1] == source[3] diff --git a/podpac/core/data/test/test_pydap.py b/podpac/core/data/test/test_pydap.py index 5c037e0f6..d91cccef1 100644 --- a/podpac/core/data/test/test_pydap.py +++ b/podpac/core/data/test/test_pydap.py @@ -1,126 +1,98 @@ import pydap -import numpy as np import pytest -from traitlets import TraitError +import numpy as np +import traitlets as tl +import requests from podpac.core.coordinates import Coordinates, clinspace from podpac.core.units import UnitsDataArray +from podpac.core import authentication from podpac.core.data.pydap_source import PyDAP - -# Trying to fix test -pydap.client.open_url +from podpac import settings class MockPyDAP(PyDAP): """mock pydap data source """ source = "http://demo.opendap.org" - username = "username" - password = "password" - datakey = "key" + data_key = "key" + data = np.random.rand(11, 11) + + def get_coordinates(self): + return Coordinates([clinspace(-25, 25, 11), clinspace(-25, 25, 11)], dims=["lat", "lon"]) - def get_native_coordinates(self): - return self.native_coordinates + def _open_url(self): + base = pydap.model.BaseType(name="key", data=self.data) + dataset = pydap.model.DatasetType(name="dataset") + dataset["key"] = base + return dataset class TestPyDAP(object): """test pydap datasource""" source = "http://demo.opendap.org" - username = "username" - password = "password" - datakey = "key" - - # mock parameters and data - data = np.random.rand(11, 11) # mocked from pydap endpoint - coordinates = Coordinates([clinspace(-25, 25, 11), clinspace(-25, 25, 11)], dims=["lat", "lon"]) - - def mock_pydap(self): - def open_url(url, session=None): - base = pydap.model.BaseType(name="key", data=self.data) - dataset = pydap.model.DatasetType(name="dataset") - dataset["key"] = base - return dataset - - pydap.client.open_url = open_url + data_key = "key" def test_init(self): - """test basic init of class""" + node = PyDAP(source="mysource", data_key="key") - node = PyDAP(source=self.source, datakey=self.datakey, username=self.username, password=self.password) - assert isinstance(node, PyDAP) + def test_coordinates_not_implemented(self): + node = PyDAP(source="mysource", data_key="key") + with pytest.raises(NotImplementedError): + node.coordinates - node = MockPyDAP() - assert isinstance(node, MockPyDAP) - - def test_traits(self): - """ check each of the pydap traits """ - - with pytest.raises(TraitError): - PyDAP(source=5, datakey=self.datakey) + def test_keys(self): + """test return of dataset keys""" - with pytest.raises(TraitError): - PyDAP(source=self.source, datakey=5) + node = MockPyDAP() + keys = node.keys + assert "key" in keys - nodes = [PyDAP(source=self.source, datakey=self.datakey), MockPyDAP()] + def test_session(self): + """test session attribute and traitlet default """ - # TODO: in traitlets, if you already define variable, it won't enforce case on - # redefinition - with pytest.raises(TraitError): - nodes[0].username = 5 + # hostname should be the same as the source, parsed by request + node = PyDAP(source=self.source, data_key=self.data_key) + assert node.hostname == "demo.opendap.org" - with pytest.raises(TraitError): - nodes[0].password = 5 + # defaults to no auth required + assert node.auth_required == False - for node in nodes: - with pytest.raises(TraitError): - node.auth_class = "auth_class" + # session should be available + assert node.session + assert isinstance(node.session, requests.Session) - with pytest.raises(TraitError): - node.auth_session = "auth_class" + # auth required + with settings: + if "username@test.org" in settings: + del settings["username@test.org"] - with pytest.raises(TraitError): - node.dataset = [1, 2, 3] + if "password@test.org" in settings: + del settings["password@test.org"] - def test_auth_session(self): - """test auth_session attribute and traitlet default """ + node = PyDAP(source=self.source, data_key=self.data_key, hostname="test.org", auth_required=True) + assert node.hostname == "test.org" - # default to none if no username and password - node = PyDAP(source=self.source, datakey=self.datakey) - assert node.auth_session is None + # throw auth error + with pytest.raises(ValueError): + s = node.session - # default to none if no auth_class - node = PyDAP(source=self.source, datakey=self.datakey, username=self.username, password=self.password) - assert node.auth_session is None + node.set_credentials(username="user", password="pass") + assert node.session + assert isinstance(node.session, requests.Session) def test_dataset(self): - """test dataset trait """ - self.mock_pydap() - - node = PyDAP(source=self.source, datakey=self.datakey) + node = MockPyDAP() assert isinstance(node.dataset, pydap.model.DatasetType) + def test_url_error(self): + node = PyDAP(source="mysource") + with pytest.raises(RuntimeError): + node.dataset + def test_get_data(self): """test get_data function of pydap""" - self.mock_pydap() - - node = PyDAP(source=self.source, datakey=self.datakey, native_coordinates=self.coordinates) - output = node.eval(self.coordinates) - assert isinstance(output, UnitsDataArray) - assert output.values[0, 0] == self.data[0, 0] - - node = MockPyDAP(native_coordinates=self.coordinates) - output = node.eval(self.coordinates) - assert isinstance(output, UnitsDataArray) - - def test_native_coordinates(self): - """test native coordinates of pydap datasource""" - pass - - def test_keys(self): - """test return of dataset keys""" - self.mock_pydap() - - node = MockPyDAP(native_coordinates=self.coordinates) - keys = node.keys - assert "key" in keys + node = MockPyDAP() + output = node.eval(node.coordinates) + np.testing.assert_array_equal(output.values, node.data) diff --git a/podpac/core/data/test/test_rasterio.py b/podpac/core/data/test/test_rasterio.py index f76d3548a..4aec3da83 100644 --- a/podpac/core/data/test/test_rasterio.py +++ b/podpac/core/data/test/test_rasterio.py @@ -8,17 +8,7 @@ from podpac.core.coordinates import Coordinates from podpac.core.units import UnitsDataArray -from podpac.core.data.file import Rasterio - - -class MockRasterio(Rasterio): - """mock rasterio data source """ - - source = os.path.join(os.path.dirname(__file__), "assets/RGB.byte.tif") - band = 1 - - def get_native_coordinates(self): - return self.native_coordinates +from podpac.core.data.rasterio_source import Rasterio class TestRasterio(object): @@ -31,19 +21,6 @@ def test_init(self): """test basic init of class""" node = Rasterio(source=self.source, band=self.band) - assert isinstance(node, Rasterio) - - node = MockRasterio() - assert isinstance(node, MockRasterio) - - def test_traits(self): - """ check each of the rasterio traits """ - - with pytest.raises(TraitError): - Rasterio(source=5, band=self.band) - - with pytest.raises(TraitError): - Rasterio(source=self.source, band="test") def test_dataset(self): """test dataset attribute and trait default """ @@ -57,44 +34,43 @@ def test_dataset(self): node.close_dataset() - def test_default_native_coordinates(self): - """test default native coordinates implementations""" + def test_coordinates(self): + """test default coordinates implementations""" node = Rasterio(source=self.source) - native_coordinates = node.get_native_coordinates() - assert isinstance(native_coordinates, Coordinates) - assert len(native_coordinates["lat"]) == 718 + assert isinstance(node.coordinates, Coordinates) + assert len(node.coordinates["lat"]) == 718 def test_get_data(self): """test default get_data method""" node = Rasterio(source=self.source) - native_coordinates = node.get_native_coordinates() - output = node.eval(native_coordinates) - + output = node.eval(node.coordinates) assert isinstance(output, UnitsDataArray) - def test_band_descriptions(self): - """test band count method""" - node = Rasterio(source=self.source) - bands = node.band_descriptions - assert bands and isinstance(bands, OrderedDict) - def test_band_count(self): """test band descriptions methods""" node = Rasterio(source=self.source) - count = node.band_count - assert count and isinstance(count, int) + assert node.band_count == 3 + + def test_band_descriptions(self): + """test band count method""" + node = Rasterio(source=self.source) + assert isinstance(node.band_descriptions, OrderedDict) + assert list(node.band_descriptions.keys()) == [0, 1, 2] def test_band_keys(self): """test band keys methods""" node = Rasterio(source=self.source) - keys = node.band_keys - assert keys and isinstance(keys, dict) + assert set(node.band_keys.keys()) == { + "STATISTICS_STDDEV", + "STATISTICS_MINIMUM", + "STATISTICS_MEAN", + "STATISTICS_MAXIMUM", + } def test_get_band_numbers(self): """test band numbers methods""" node = Rasterio(source=self.source) numbers = node.get_band_numbers("STATISTICS_MINIMUM", "0") - assert isinstance(numbers, np.ndarray) - np.testing.assert_array_equal(numbers, np.arange(3) + 1) + np.testing.assert_array_equal(numbers, [1, 2, 3]) diff --git a/podpac/core/data/test/test_reprojected_source.py b/podpac/core/data/test/test_reprojected_source.py index 65f54e93b..9d2318a28 100644 --- a/podpac/core/data/test/test_reprojected_source.py +++ b/podpac/core/data/test/test_reprojected_source.py @@ -6,6 +6,8 @@ from podpac.core.coordinates import Coordinates, clinspace from podpac.core.units import UnitsDataArray from podpac.core.node import Node +from podpac.core.algorithm.utility import Arange +from podpac.core.data.datasource import DataSource from podpac.core.data.array_source import Array from podpac.core.data.reprojection import ReprojectedSource @@ -16,70 +18,88 @@ class TestReprojectedSource(object): TODO: this needs to be reworked with real examples """ - source = Node() data = np.random.rand(11, 11) - native_coordinates = Coordinates([clinspace(-25, 25, 11), clinspace(-25, 25, 11)], dims=["lat", "lon"]) + coordinates = Coordinates([clinspace(-25, 25, 11), clinspace(-25, 25, 11)], dims=["lat", "lon"]) reprojected_coordinates = Coordinates([clinspace(-25, 50, 11), clinspace(-25, 50, 11)], dims=["lat", "lon"]) def test_init(self): """test basic init of class""" - node = ReprojectedSource(source=self.source) + node = ReprojectedSource(source=Node(), reprojected_coordinates=self.reprojected_coordinates) assert isinstance(node, ReprojectedSource) - def test_traits(self): - """ check each of the s3 traits """ + def test_coordinates(self): + """test coordinates""" - ReprojectedSource(source=self.source) - with pytest.raises(TraitError): - ReprojectedSource(source=5) + # source has no coordinates, just use reprojected_coordinates + node = ReprojectedSource(source=Node(), reprojected_coordinates=self.reprojected_coordinates) + assert node.coordinates == self.reprojected_coordinates - ReprojectedSource(source_interpolation="bilinear") - with pytest.raises(TraitError): - ReprojectedSource(source_interpolation=5) - - ReprojectedSource(reprojected_coordinates=self.reprojected_coordinates) - with pytest.raises(TraitError): - ReprojectedSource(reprojected_coordinates=5) - - def test_native_coordinates(self): - """test native coordinates""" - - # error if no source has coordinates - with pytest.raises(Exception): - node = ReprojectedSource(source=Node()) - node.native_coordinates - - # source as Node - node = ReprojectedSource(source=self.source, reprojected_coordinates=self.reprojected_coordinates) - assert isinstance(node.native_coordinates, Coordinates) - assert node.native_coordinates["lat"].coordinates[0] == self.reprojected_coordinates["lat"].coordinates[0] + # source has coordinates + source = Array(coordinates=self.coordinates) + node = ReprojectedSource(source=source, reprojected_coordinates=self.reprojected_coordinates) + assert node.coordinates == self.reprojected_coordinates def test_get_data(self): """test get data from reprojected source""" - datanode = Array(source=self.data, native_coordinates=self.native_coordinates) - node = ReprojectedSource(source=datanode, reprojected_coordinates=datanode.native_coordinates) - output = node.eval(node.native_coordinates) - assert isinstance(output, UnitsDataArray) + source = Array(source=self.data, coordinates=self.coordinates) + node = ReprojectedSource(source=source, reprojected_coordinates=source.coordinates) + output = node.eval(node.coordinates) - def test_base_ref(self): - """test base ref""" + def test_source_interpolation(self): + """test get data from reprojected source""" - node = ReprojectedSource(source=self.source, reprojected_coordinates=self.reprojected_coordinates) - ref = node.base_ref - assert "_reprojected" in ref + # no source_interpolation + source = Array(source=self.data, coordinates=self.coordinates, interpolation="nearest") + node = ReprojectedSource(source=source, reprojected_coordinates=self.reprojected_coordinates) + assert source.interpolation == "nearest" + assert node.source.interpolation == "nearest" + assert node.eval_source.interpolation == "nearest" + assert node.eval_source.coordinates == source.coordinates + np.testing.assert_array_equal(node.eval_source.source, source.source) + + # matching source_interpolation + source = Array(source=self.data, coordinates=self.coordinates, interpolation="nearest") + node = ReprojectedSource( + source=source, reprojected_coordinates=self.reprojected_coordinates, source_interpolation="nearest" + ) + assert source.interpolation == "nearest" + assert node.source.interpolation == "nearest" + assert node.eval_source.interpolation == "nearest" + assert node.eval_source.coordinates == source.coordinates + np.testing.assert_array_equal(node.eval_source.source, source.source) + + # non-matching source_interpolation + source = Array(source=self.data, coordinates=self.coordinates, interpolation="nearest") + node = ReprojectedSource( + source=source, reprojected_coordinates=self.reprojected_coordinates, source_interpolation="bilinear" + ) + assert source.interpolation == "nearest" + assert node.source.interpolation == "nearest" + assert node.eval_source.interpolation == "bilinear" + assert node.eval_source.coordinates == source.coordinates + np.testing.assert_array_equal(node.eval_source.source, source.source) + + # no source.interpolation to set (trigger logger warning) + source = Node() + node = ReprojectedSource( + source=source, reprojected_coordinates=self.reprojected_coordinates, source_interpolation="bilinear" + ) + + def test_interpolation_warning(self): + node = ReprojectedSource(source=Arange(), reprojected_coordinates=self.coordinates) + output = node.eval(node.coordinates) - def test_base_definition(self): - """test definition""" + def test_base_ref(self): + """test base ref""" - node = ReprojectedSource(source=self.source, reprojected_coordinates=self.reprojected_coordinates) - d = node.base_definition - assert d["attrs"]["reprojected_coordinates"] == self.reprojected_coordinates + node = ReprojectedSource(source=Node(), reprojected_coordinates=self.reprojected_coordinates) + assert "_reprojected" in node.base_ref def test_deserialize_reprojected_coordinates(self): - node1 = ReprojectedSource(source=self.source, reprojected_coordinates=self.reprojected_coordinates) - node2 = ReprojectedSource(source=self.source, reprojected_coordinates=self.reprojected_coordinates.definition) - node3 = ReprojectedSource(source=self.source, reprojected_coordinates=self.reprojected_coordinates.json) + node1 = ReprojectedSource(source=Node(), reprojected_coordinates=self.reprojected_coordinates) + node2 = ReprojectedSource(source=Node(), reprojected_coordinates=self.reprojected_coordinates.definition) + node3 = ReprojectedSource(source=Node(), reprojected_coordinates=self.reprojected_coordinates.json) assert node1.reprojected_coordinates == self.reprojected_coordinates assert node2.reprojected_coordinates == self.reprojected_coordinates diff --git a/podpac/core/data/test/test_wcs.py b/podpac/core/data/test/test_wcs.py index cf1522723..011cc8874 100644 --- a/podpac/core/data/test/test_wcs.py +++ b/podpac/core/data/test/test_wcs.py @@ -81,11 +81,11 @@ def test_get_capabilities_url(self): """test the capabilities url generation""" node = WCS(source=self.source) - url = node.get_capabilities_url + url = node.capabilities_url assert isinstance(url, string_types) assert node.source in url - def test_get_wcs_coordinates(self): + def test_wcs_coordinates(self): """get wcs coordinates""" import podpac.core.data.ogc @@ -128,44 +128,44 @@ def test_get_wcs_coordinates(self): node = WCS(source=self.source) with pytest.raises(Exception): - node.get_wcs_coordinates() + node.wcs_coordinates # put all dependencies back podpac.core.data.ogc.requests = requests podpac.core.data.ogc.urllib3 = urllib3 podpac.core.data.ogc.lxml = lxml - def test_get_native_coordinates(self): - """get native coordinates""" + def test_coordinates(self): + """get coordinates""" self.mock_requests() node = WCS(source=self.source) # equal to wcs coordinates when no eval coordinates - native_coordinates = node.native_coordinates + coordinates = node.coordinates wcs_coordinates = node.wcs_coordinates - assert native_coordinates == wcs_coordinates + assert coordinates == wcs_coordinates # with eval coordinates # TODO: use real eval coordinates - node._output_coordinates = native_coordinates - native_coordinates = node.native_coordinates + node._output_coordinates = coordinates + coordinates = node.coordinates - assert isinstance(native_coordinates, Coordinates) + assert isinstance(coordinates, Coordinates) # TODO: one returns monotonic, the other returns uniform - assert native_coordinates == node._output_coordinates - assert native_coordinates["lat"] - assert native_coordinates["lon"] - assert native_coordinates["time"] + assert coordinates == node._output_coordinates + assert coordinates["lat"] + assert coordinates["lon"] + assert coordinates["time"] def test_get_data(self): """get data from wcs server""" self.mock_requests() node = WCS(source=self.source) - lat = node.native_coordinates["lat"].coordinates - lon = node.native_coordinates["lon"].coordinates - time = node.native_coordinates["time"].coordinates + lat = node.coordinates["lat"].coordinates + lon = node.coordinates["lon"].coordinates + time = node.coordinates["time"].coordinates # no time notime_coordinates = Coordinates( @@ -176,7 +176,7 @@ def test_get_data(self): with pytest.raises(ValueError): output = node.eval(notime_coordinates) assert isinstance(output, UnitsDataArray) - assert output.native_coordinates["lat"][0] == node.native_coordinates["lat"][0] + assert output.coordinates["lat"][0] == node.coordinates["lat"][0] # time time_coordinates = Coordinates( diff --git a/podpac/core/data/test/test_zarr.py b/podpac/core/data/test/test_zarr.py index 64a6dadfe..672b54cf8 100644 --- a/podpac/core/data/test/test_zarr.py +++ b/podpac/core/data/test/test_zarr.py @@ -6,7 +6,7 @@ from traitlets import TraitError from podpac.core.coordinates import Coordinates -from podpac.core.data.file import Zarr +from podpac.core.data.zarr_source import Zarr class TestZarr(object): @@ -24,13 +24,13 @@ def test_dims(self): node = Zarr(source=self.path) assert node.dims == ["lat", "lon"] - def test_available_keys(self): + def test_available_data_keys(self): node = Zarr(source=self.path) - assert node.available_keys == ["a", "b"] + assert node.available_data_keys == ["a", "b"] - def test_native_coordinates(self): + def test_coordinates(self): node = Zarr(source=self.path, data_key="a") - assert node.native_coordinates == Coordinates([[0, 1, 2], [10, 20, 30, 40]], dims=["lat", "lon"]) + assert node.coordinates == Coordinates([[0, 1, 2], [10, 20, 30, 40]], dims=["lat", "lon"]) def test_eval(self): coords = Coordinates([0, 10], dims=["lat", "lon"]) @@ -44,7 +44,7 @@ def test_eval(self): def test_eval_multiple(self): coords = Coordinates([0, 10], dims=["lat", "lon"]) - z = Zarr(source=self.path, output_keys=["a", "b"]) + z = Zarr(source=self.path, data_key=["a", "b"]) out = z.eval(coords) assert out.dims == ("lat", "lon", "output") np.testing.assert_array_equal(out["output"], ["a", "b"]) @@ -52,14 +52,14 @@ def test_eval_multiple(self): assert out.sel(output="b")[0, 0] == 1.0 # single output key - z = Zarr(source=self.path, output_keys=["a"]) + z = Zarr(source=self.path, data_key=["a"]) out = z.eval(coords) assert out.dims == ("lat", "lon", "output") np.testing.assert_array_equal(out["output"], ["a"]) assert out.sel(output="a")[0, 0] == 0.0 # alternate output names - z = Zarr(source=self.path, output_keys=["a", "b"], outputs=["A", "B"]) + z = Zarr(source=self.path, data_key=["a", "b"], outputs=["A", "B"]) out = z.eval(coords) assert out.dims == ("lat", "lon", "output") np.testing.assert_array_equal(out["output"], ["A", "B"]) @@ -79,7 +79,3 @@ def test_s3(self): path = "s3://podpac-internal-test/drought_parameters.zarr" node = Zarr(source=path, data_key="d0") node.close_dataset() - - def test_used_dataset_directly(self): - dataset = zarr.open(self.path, "r") - node = Zarr(dataset=dataset, data_key="a") diff --git a/podpac/core/data/zarr_source.py b/podpac/core/data/zarr_source.py new file mode 100644 index 000000000..e44de5fe9 --- /dev/null +++ b/podpac/core/data/zarr_source.py @@ -0,0 +1,194 @@ +import os +import traitlets as tl +import numpy as np + +from lazy_import import lazy_module, lazy_class, lazy_function + +zarr = lazy_module("zarr") +zarr_open = lazy_function("zarr.convenience.open") +zarr_open_consolidated = lazy_function("zarr.convenience.open_consolidated") +zarrGroup = lazy_class("zarr.Group") + +from podpac.core.authentication import S3Mixin +from podpac.core.utils import common_doc, cached_property +from podpac.core.data.datasource import COMMON_DATA_DOC, DATA_DOC +from podpac.core.data.file_source import BaseFileSource, FileKeysMixin + + +class Zarr(S3Mixin, FileKeysMixin, BaseFileSource): + """Create a DataSource node using zarr. + + Attributes + ---------- + source : str + Path to the Zarr archive + file_mode : str, optional + Default is 'r'. The mode used to open the Zarr archive. Options are r, r+, w, w- or x, a. + dataset : zarr.Group + The h5py file object used to read the file + coordinates : :class:`podpac.Coordinates` + {coordinates} + data_key : str, int + data key, default 'data' + lat_key : str, int + latitude coordinates key, default 'lat' + lon_key : str, int + longitude coordinates key, default 'lon' + time_key : str, int + time coordinates key, default 'time' + alt_key : str, int + altitude coordinates key, default 'alt' + crs : str + Coordinate reference system of the coordinates + cf_time : bool + decode CF datetimes + cf_units : str + units, when decoding CF datetimes + cf_calendar : str + calendar, when decoding CF datetimes + """ + + file_mode = tl.Unicode(default_value="r").tag(readonly=True) + coordinate_index_type = "slice" + + def _get_store(self): + if self.source.startswith("s3://"): + s3fs = lazy_module("s3fs") + root = self.source.strip("s3://") + s3map = s3fs.S3Map(root=root, s3=self.s3, check=False) + store = s3map + else: + store = str(self.source) # has to be a string in Python2.7 for local files + return store + + def chunk_exists(self, index=None, chunk_str=None, data_key=None, chunks=None, list_dir=[]): + """ + Test to see if a chunk exists for a particular slice. + Note: Only the start of the index is used. + + Parameters + ----------- + index: tuple(slice), optional + Default is None. A tuple of slices indicating the data that the users wants to access + chunk_str: str, optional + Default is None. A string equivalent to the filename of the chunk (.e.g. "1.0.5") + data_key: str, optional + Default is None. The data_key for the zarr array that will be queried. + chunks: list, optional + Defaut is None. The chunk structure of the zarr array. If not provided will use self.dataset[data_key].chunks + list_dir: list, optional + A list of existing paths -- used in lieu of 'exist' calls + """ + + if not data_key: + data_key = "" + + if not chunks: + if data_key: + chunks = self.dataset[data_key].chunks + else: + chunks = self.dataset.chunks + + if index: + chunk_str = ".".join([str(int(s.start // chunks[i])) for i, s in enumerate(index)]) + + if not index and not chunk_str: + raise ValueError("Either the index or chunk_str needs to be specified") + + path = os.path.join(self.source, data_key, chunk_str) + if self.source.startswith("s3:"): + path = path.replace("\\", "/") + else: + path = path.replace("/", os.sep) + if list_dir: + return path in list_dir + + if self.source.startswith("s3:"): + fs = self.s3 + else: + fs = os.path + + return fs.exists(path) + + def list_dir(self, data_key=None): + za = self.dataset + if data_key: + za = za[data_key] + else: + data_key = "" + + path = os.path.join(self.source, data_key) + if self.source.startswith("s3:"): + path = path.replace("\\", "/") + ld = ["s3://" + p for p in self.s3.ls(path)] + else: + path = path.replace("/", os.sep) + ld = [os.path.join(path, p) for p in os.listdir(path)] + + return ld + + @cached_property + def dataset(self): + store = self._get_store() + try: + # import zarr.open + # import zarr.open_consolidated + if self.file_mode == "r": + try: + return zarr_open_consolidated(store, self.file_mode) + except KeyError: + pass # No consolidated metadata available + return zarr_open(store, mode=self.file_mode) + except ValueError: + raise ValueError("No Zarr store found at path '%s'" % self.source) + + # ------------------------------------------------------------------------- + # public api methods + # ------------------------------------------------------------------------- + + @cached_property + def dims(self): + if not isinstance(self.data_key, list): + key = self.data_key + else: + key = self.data_key[0] + try: + return self.dataset[key].attrs["_ARRAY_DIMENSIONS"] + except: + lookup = {self.lat_key: "lat", self.lon_key: "lon", self.alt_key: "alt", self.time_key: "time"} + return [lookup[key] for key in self.dataset if key in lookup] + + def _add_keys(self, base_keys): + keys = base_keys.copy() + for bk in base_keys: + try: + new_keys = [bk + "/" + k for k in self.dataset[bk].keys()] + keys.extend(new_keys) + + # Remove the group key + keys.pop(keys.index(bk)) + except AttributeError: + pass + return keys + + @cached_property + def keys(self): + keys = list(self.dataset.keys()) + full_keys = self._add_keys(keys) + while keys != full_keys: + keys = full_keys.copy() + full_keys = self._add_keys(keys) + + return full_keys + + @common_doc(COMMON_DATA_DOC) + def get_data(self, coordinates, coordinates_index): + """{get_data} + """ + data = self.create_output_array(coordinates) + if not isinstance(self.data_key, list): + data[:] = self.dataset[self.data_key][coordinates_index] + else: + for key, name in zip(self.data_key, self.outputs): + data.sel(output=name)[:] = self.dataset[key][coordinates_index] + return data diff --git a/podpac/core/interpolation/__init__.py b/podpac/core/interpolation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/podpac/core/data/interpolation.py b/podpac/core/interpolation/interpolation.py similarity index 97% rename from podpac/core/data/interpolation.py rename to podpac/core/interpolation/interpolation.py index 345d16af6..f4d304286 100644 --- a/podpac/core/data/interpolation.py +++ b/podpac/core/interpolation/interpolation.py @@ -7,8 +7,8 @@ import numpy as np # podpac imports -from podpac.core.data.interpolator import Interpolator -from podpac.core.data.interpolators import NearestNeighbor, NearestPreview, Rasterio, ScipyPoint, ScipyGrid +from podpac.core.interpolation.interpolator import Interpolator +from podpac.core.interpolation.interpolators import NearestNeighbor, NearestPreview, Rasterio, ScipyPoint, ScipyGrid INTERPOLATION_DEFAULT = "nearest" """str : Default interpolation method used when creating a new :class:`Interpolation` class """ @@ -65,6 +65,8 @@ def load_interpolators(): # load interpolators when module is first loaded +# TODO does this really only load once? +# TODO maybe move this whole section? load_interpolators() @@ -76,22 +78,6 @@ class InterpolationException(Exception): pass -def interpolation_trait(default_value=INTERPOLATION_DEFAULT, allow_none=True, **kwargs): - """Create a new interpolation trait - - Returns - ------- - tl.Union - Union trait for an interpolation definition - """ - return tl.Union( - [tl.Dict(), tl.List(), tl.Enum(INTERPOLATION_METHODS), tl.Instance(Interpolation)], - allow_none=allow_none, - default_value=default_value, - **kwargs, - ) - - class Interpolation(object): """Create an interpolation class to handle one interpolation method per unstacked dimension. Used to interpolate data within a datasource. @@ -514,3 +500,15 @@ def interpolate(self, source_coordinates, source_data, eval_coordinates, output_ ) return output_data + + +class InterpolationTrait(tl.Union): + default_value = INTERPOLATION_DEFAULT + + def __init__( + self, + trait_types=[tl.Dict(), tl.List(), tl.Enum(INTERPOLATION_METHODS), tl.Instance(Interpolation)], + *args, + **kwargs + ): + super(InterpolationTrait, self).__init__(trait_types=trait_types, *args, **kwargs) diff --git a/podpac/core/data/interpolator.py b/podpac/core/interpolation/interpolator.py similarity index 93% rename from podpac/core/data/interpolator.py rename to podpac/core/interpolation/interpolator.py index 079213256..13cac0cf8 100644 --- a/podpac/core/data/interpolator.py +++ b/podpac/core/interpolation/interpolator.py @@ -13,6 +13,7 @@ import numpy as np import traitlets as tl +import six # Set up logging _log = logging.getLogger(__name__) @@ -29,7 +30,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at methods_supported : list List of methods supported by the interpolator. This attribute should be defined by the implementing :class:`Interpolator`. - See :ref:`INTERPOLATION_METHODS` for list of available method strings. + See :attr:`podpac.data.INTERPOLATION_METHODS` for list of available method strings. dims_supported : list List of unstacked dimensions supported by the interpolator. This attribute should be defined by the implementing :class:`Interpolator`. @@ -92,7 +93,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at ------- (:class:`podpac.Coordinates`, list) returns the new down selected coordinates and the new associated index. These coordinates must exist - in the native coordinates of the source data + in the coordinates of the source data Raises ------ @@ -249,7 +250,7 @@ def _dim_in(self, dim, *coords, **kwargs): unstacked = kwargs.pop("unstacked", False) - if isinstance(dim, str): + if isinstance(dim, six.string_types): dim = [dim] elif not isinstance(dim, (list, tuple)): raise ValueError("`dim` input must be a str, list of str, or tuple of str") @@ -264,32 +265,6 @@ def _dim_in(self, dim, *coords, **kwargs): def _loop_helper( self, func, keep_dims, udims, source_coordinates, source_data, eval_coordinates, output_data, **kwargs ): - """Loop helper - - Parameters - ---------- - func : TYPE - Description - keep_dims : TYPE - Description - udims : TYPE - Description - source_coordinates : TYPE - Description - source_data : TYPE - Description - eval_coordinates : TYPE - Description - output_data : TYPE - Description - **kwargs - Description - - Returns - ------- - TYPE - Description - """ loop_dims = [d for d in source_data.dims if d not in keep_dims] if loop_dims: dim = loop_dims[0] @@ -312,7 +287,7 @@ def _loop_helper( tol = self.spatial_tolerance diff = np.abs(source_data.coords[dim].values - i.values) - if tol == None or diff <= tol: + if tol == None or np.any(diff <= tol): src_i = (diff).argmin() src_idx = {dim: source_data.coords[dim][src_i]} else: @@ -329,7 +304,7 @@ def _loop_helper( source_data.loc[src_idx], eval_coordinates, output_data.loc[idx], - **kwargs, + **kwargs ) else: return func(udims, source_coordinates, source_data, eval_coordinates, output_data, **kwargs) diff --git a/podpac/core/data/interpolators.py b/podpac/core/interpolation/interpolators.py similarity index 93% rename from podpac/core/data/interpolators.py rename to podpac/core/interpolation/interpolators.py index 96d8ade11..e251ba784 100644 --- a/podpac/core/data/interpolators.py +++ b/podpac/core/interpolation/interpolators.py @@ -22,7 +22,7 @@ scipy = None # podac imports -from podpac.core.data.interpolator import COMMON_INTERPOLATOR_DOCS, Interpolator, InterpolatorException +from podpac.core.interpolation.interpolator import COMMON_INTERPOLATOR_DOCS, Interpolator, InterpolatorException from podpac.core.units import UnitsDataArray from podpac.core.coordinates import Coordinates, UniformCoordinates1d, StackedCoordinates from podpac.core.utils import common_doc @@ -196,7 +196,7 @@ def select_coordinates(self, udims, source_coordinates, source_coordinates_index new_coords.append(c) new_coords_idx.append(idx) - return Coordinates(new_coords), tuple(new_coords_idx) + return Coordinates(new_coords, validate_crs=False), tuple(new_coords_idx) @common_doc(COMMON_INTERPOLATOR_DOCS) @@ -269,35 +269,13 @@ def interpolate(self, udims, source_coordinates, source_data, eval_coordinates, self.interpolate, keep_dims, udims, source_coordinates, source_data, eval_coordinates, output_data ) - def get_rasterio_transform(c): - """Summary - - Parameters - ---------- - c : TYPE - Description - - Returns - ------- - TYPE - Description - """ - west, east = c["lon"].area_bounds - south, north = c["lat"].area_bounds - cols, rows = (c["lon"].size, c["lat"].size) - # print (east, west, south, north) - return transform.from_bounds(west, south, east, north, cols, rows) - with rasterio.Env(): - src_transform = get_rasterio_transform(source_coordinates) + src_transform = transform.Affine.from_gdal(*source_coordinates.geotransform) src_crs = {"init": source_coordinates.crs} # Need to make sure array is c-contiguous - if source_coordinates["lat"].is_descending: - source = np.ascontiguousarray(source_data.data) - else: - source = np.ascontiguousarray(source_data.data[::-1, :]) + source = np.ascontiguousarray(source_data.data) - dst_transform = get_rasterio_transform(eval_coordinates) + dst_transform = transform.Affine.from_gdal(*eval_coordinates.geotransform) dst_crs = {"init": eval_coordinates.crs} # Need to make sure array is c-contiguous if not output_data.data.flags["C_CONTIGUOUS"]: @@ -316,10 +294,7 @@ def get_rasterio_transform(c): dst_nodata=np.nan, resampling=getattr(Resampling, self.method), ) - if eval_coordinates["lat"].is_descending: - output_data.data[:] = destination - else: - output_data.data[:] = destination[::-1, :] + output_data.data[:] = destination return output_data diff --git a/podpac/core/interpolation/test/test_interpolation.py b/podpac/core/interpolation/test/test_interpolation.py new file mode 100644 index 000000000..9e81f4dfe --- /dev/null +++ b/podpac/core/interpolation/test/test_interpolation.py @@ -0,0 +1,335 @@ +""" +Test interpolation methods + + +""" +# pylint: disable=C0111,W0212,R0903 + +from collections import OrderedDict +from copy import deepcopy + +import pytest +import traitlets as tl +import numpy as np + +from podpac.core.units import UnitsDataArray +from podpac.core.coordinates import Coordinates +from podpac.core.interpolation.interpolation import Interpolation, InterpolationException +from podpac.core.interpolation.interpolation import ( + INTERPOLATION_METHODS, + INTERPOLATION_DEFAULT, + INTERPOLATION_METHODS_DICT, +) +from podpac.core.interpolation.interpolator import Interpolator, InterpolatorException +from podpac.core.interpolation.interpolators import NearestNeighbor, NearestPreview + + +class TestInterpolation(object): + """ Test interpolation class and support methods""" + + def test_allow_missing_modules(self): + """TODO: Allow user to be missing rasterio and scipy""" + pass + + def test_interpolation_methods(self): + assert len(set(INTERPOLATION_METHODS) & set(INTERPOLATION_METHODS_DICT.keys())) == len(INTERPOLATION_METHODS) + + def test_interpolator_init_type(self): + """test constructor + """ + + # should throw an error if definition is not str, dict, or Interpolator + with pytest.raises(TypeError): + Interpolation(5) + + def test_str_definition(self): + # should throw an error if string input is not one of the INTERPOLATION_METHODS + with pytest.raises(InterpolationException): + Interpolation("test") + + interp = Interpolation("nearest") + assert interp.config[("default",)] + assert isinstance(interp.config[("default",)], dict) + assert interp.config[("default",)]["method"] == "nearest" + assert isinstance(interp.config[("default",)]["interpolators"][0], Interpolator) + + def test_dict_definition(self): + + # should handle a default definition without any dimensions + interp = Interpolation({"method": "nearest", "params": {"spatial_tolerance": 1}}) + assert isinstance(interp.config[("default",)], dict) + assert interp.config[("default",)]["method"] == "nearest" + assert isinstance(interp.config[("default",)]["interpolators"][0], Interpolator) + assert interp.config[("default",)]["params"] == {"spatial_tolerance": 1} + + # handle string methods + interp = Interpolation({"method": "nearest", "dims": ["lat", "lon"]}) + print(interp.config) + assert isinstance(interp.config[("lat", "lon")], dict) + assert interp.config[("lat", "lon")]["method"] == "nearest" + assert isinstance(interp.config[("default",)]["interpolators"][0], Interpolator) + assert interp.config[("default",)]["params"] == {} + + # handle dict methods + + # should throw an error if method is not in dict + with pytest.raises(InterpolationException): + Interpolation([{"test": "test", "dims": ["lat", "lon"]}]) + + # should throw an error if method is not a string + with pytest.raises(InterpolationException): + Interpolation([{"method": 5, "dims": ["lat", "lon"]}]) + + # should throw an error if method is not one of the INTERPOLATION_METHODS and no interpolators defined + with pytest.raises(InterpolationException): + Interpolation([{"method": "myinter", "dims": ["lat", "lon"]}]) + + # should throw an error if params is not a dict + with pytest.raises(TypeError): + Interpolation([{"method": "nearest", "dims": ["lat", "lon"], "params": "test"}]) + + # should throw an error if interpolators is not a list + with pytest.raises(TypeError): + Interpolation([{"method": "nearest", "interpolators": "test", "dims": ["lat", "lon"]}]) + + # should throw an error if interpolators are not Interpolator classes + with pytest.raises(TypeError): + Interpolation([{"method": "nearest", "interpolators": [NearestNeighbor, "test"], "dims": ["lat", "lon"]}]) + + # should throw an error if dimension is defined twice + with pytest.raises(InterpolationException): + Interpolation([{"method": "nearest", "dims": ["lat", "lon"]}, {"method": "bilinear", "dims": ["lat"]}]) + + # should throw an error if dimension is not a list + with pytest.raises(TypeError): + Interpolation([{"method": "nearest", "dims": "lat"}]) + + # should handle standard INTEPROLATION_SHORTCUTS + interp = Interpolation([{"method": "nearest", "dims": ["lat", "lon"]}]) + assert isinstance(interp.config[("lat", "lon")], dict) + assert interp.config[("lat", "lon")]["method"] == "nearest" + assert isinstance(interp.config[("lat", "lon")]["interpolators"][0], Interpolator) + assert interp.config[("lat", "lon")]["params"] == {} + + # should not allow custom methods if interpolators can't support + with pytest.raises(InterpolatorException): + interp = Interpolation( + [{"method": "myinter", "interpolators": [NearestNeighbor, NearestPreview], "dims": ["lat", "lon"]}] + ) + + # should allow custom methods if interpolators can support + class MyInterp(Interpolator): + methods_supported = ["myinter"] + + interp = Interpolation([{"method": "myinter", "interpolators": [MyInterp], "dims": ["lat", "lon"]}]) + assert interp.config[("lat", "lon")]["method"] == "myinter" + assert isinstance(interp.config[("lat", "lon")]["interpolators"][0], MyInterp) + + # should allow params to be set + interp = Interpolation( + [ + { + "method": "myinter", + "interpolators": [MyInterp], + "params": {"spatial_tolerance": 5}, + "dims": ["lat", "lon"], + } + ] + ) + + assert interp.config[("lat", "lon")]["params"] == {"spatial_tolerance": 5} + + # set default equal to empty tuple + interp = Interpolation([{"method": "bilinear", "dims": ["lat"]}]) + assert interp.config[("default",)]["method"] == INTERPOLATION_DEFAULT + + # use default with override if not all dimensions are supplied + interp = Interpolation([{"method": "bilinear", "dims": ["lat"]}, "nearest"]) + assert interp.config[("default",)]["method"] == "nearest" + + # make sure default is always the last key in the ordered config dict + interp = Interpolation(["nearest", {"method": "bilinear", "dims": ["lat"]}]) + assert list(interp.config.keys())[-1] == ("default",) + + # should sort the dims keys + interp = Interpolation(["nearest", {"method": "bilinear", "dims": ["lon", "lat"]}]) + assert interp.config[("lat", "lon")]["method"] == "bilinear" + + def test_init_interpolators(self): + + # should set method + interp = Interpolation("nearest") + assert interp.config[("default",)]["interpolators"][0].method == "nearest" + + # Interpolation init should init all interpolators in the list + interp = Interpolation([{"method": "nearest", "params": {"spatial_tolerance": 1}}]) + assert interp.config[("default",)]["interpolators"][0].spatial_tolerance == 1 + + # should throw TraitErrors defined by Interpolator + with pytest.raises(tl.TraitError): + Interpolation([{"method": "nearest", "params": {"spatial_tolerance": "tol"}}]) + + # should not allow undefined params + with pytest.warns(DeprecationWarning): # eventually, Traitlets will raise an exception here + interp = Interpolation([{"method": "nearest", "params": {"myarg": 1}}]) + with pytest.raises(AttributeError): + assert interp.config[("default",)]["interpolators"][0].myarg == "tol" + + def test_select_interpolator_queue(self): + + reqcoords = Coordinates([[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"]) + srccoords = Coordinates([[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"]) + + # create a few dummy interpolators that handle certain dimensions + # (can_select is defined by default to look at dims_supported) + class TimeLat(Interpolator): + methods_supported = ["myinterp"] + dims_supported = ["time", "lat"] + + def can_select(self, udims, source_coordinates, eval_coordinates): + return self._filter_udims_supported(udims) + + def can_interpolate(self, udims, source_coordinates, eval_coordinates): + return self._filter_udims_supported(udims) + + class LatLon(Interpolator): + methods_supported = ["myinterp"] + dims_supported = ["lat", "lon"] + + def can_select(self, udims, source_coordinates, eval_coordinates): + return self._filter_udims_supported(udims) + + def can_interpolate(self, udims, source_coordinates, eval_coordinates): + return self._filter_udims_supported(udims) + + class Lon(Interpolator): + methods_supported = ["myinterp"] + dims_supported = ["lon"] + + def can_select(self, udims, source_coordinates, eval_coordinates): + return self._filter_udims_supported(udims) + + def can_interpolate(self, udims, source_coordinates, eval_coordinates): + return self._filter_udims_supported(udims) + + # set up a strange interpolation definition + # we want to interpolate (lat, lon) first, then after (time, alt) + interp = Interpolation( + [ + {"method": "myinterp", "interpolators": [LatLon, TimeLat], "dims": ["lat", "lon"]}, + {"method": "myinterp", "interpolators": [TimeLat, Lon], "dims": ["time", "alt"]}, + ] + ) + + # default = 'nearest', which will return NearestPreview for can_select + interpolator_queue = interp._select_interpolator_queue(srccoords, reqcoords, "can_select") + assert isinstance(interpolator_queue, OrderedDict) + assert isinstance(interpolator_queue[("lat", "lon")], LatLon) + assert ("time", "alt") not in interpolator_queue and ("alt", "time") not in interpolator_queue + + # should throw an error if strict is set and not all dimensions can be handled + with pytest.raises(InterpolationException): + interp_copy = deepcopy(interp) + del interp_copy.config[("default",)] + interpolator_queue = interp_copy._select_interpolator_queue(srccoords, reqcoords, "can_select", strict=True) + + # default = Nearest, which can handle all dims for can_interpolate + interpolator_queue = interp._select_interpolator_queue(srccoords, reqcoords, "can_interpolate") + assert isinstance(interpolator_queue, OrderedDict) + assert isinstance(interpolator_queue[("lat", "lon")], LatLon) + + if ("alt", "time") in interpolator_queue: + assert isinstance(interpolator_queue[("alt", "time")], NearestNeighbor) + else: + assert isinstance(interpolator_queue[("time", "alt")], NearestNeighbor) + + def test_select_coordinates(self): + + reqcoords = Coordinates( + [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"], crs="+proj=merc +vunits=m" + ) + srccoords = Coordinates( + [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], dims=["lat", "lon", "time", "alt"], crs="+proj=merc +vunits=m" + ) + + # create a few dummy interpolators that handle certain dimensions + # (can_select is defined by default to look at dims_supported) + class TimeLat(Interpolator): + methods_supported = ["myinterp"] + dims_supported = ["time", "lat"] + + def select_coordinates(self, udims, srccoords, srccoords_idx, reqcoords): + return srccoords, srccoords_idx + + class LatLon(Interpolator): + methods_supported = ["myinterp"] + dims_supported = ["lat", "lon"] + + def select_coordinates(self, udims, srccoords, srccoords_idx, reqcoords): + return srccoords, srccoords_idx + + class Lon(Interpolator): + methods_supported = ["myinterp"] + dims_supported = ["lon"] + + def select_coordinates(self, udims, srccoords, srccoords_idx, reqcoords): + return srccoords, srccoords_idx + + # set up a strange interpolation definition + # we want to interpolate (lat, lon) first, then after (time, alt) + interp = Interpolation( + [ + {"method": "myinterp", "interpolators": [LatLon, TimeLat], "dims": ["lat", "lon"]}, + {"method": "myinterp", "interpolators": [TimeLat, Lon], "dims": ["time", "alt"]}, + ] + ) + + coords, cidx = interp.select_coordinates(srccoords, [], reqcoords) + + assert len(coords) == len(srccoords) + assert len(coords["lat"]) == len(srccoords["lat"]) + assert cidx == () + + def test_interpolate(self): + class TestInterp(Interpolator): + dims_supported = ["lat", "lon"] + + def interpolate(self, udims, source_coordinates, source_data, eval_coordinates, output_data): + output_data = source_data + return output_data + + # test basic functionality + reqcoords = Coordinates([[-0.5, 1.5, 3.5], [0.5, 2.5, 4.5]], dims=["lat", "lon"]) + srccoords = Coordinates([[0, 2, 4], [0, 3, 4]], dims=["lat", "lon"]) + srcdata = UnitsDataArray( + np.random.rand(3, 3), coords=[srccoords[c].coordinates for c in srccoords], dims=srccoords.dims + ) + outdata = UnitsDataArray( + np.zeros(srcdata.shape), coords=[reqcoords[c].coordinates for c in reqcoords], dims=reqcoords.dims + ) + + interp = Interpolation({"method": "myinterp", "interpolators": [TestInterp], "dims": ["lat", "lon"]}) + outdata = interp.interpolate(srccoords, srcdata, reqcoords, outdata) + + assert np.all(outdata == srcdata) + + # test if data is size 1 + class TestFakeInterp(Interpolator): + dims_supported = ["lat"] + + def interpolate(self, udims, source_coordinates, source_data, eval_coordinates, output_data): + return None + + reqcoords = Coordinates([[1]], dims=["lat"]) + srccoords = Coordinates([[1]], dims=["lat"]) + srcdata = UnitsDataArray( + np.random.rand(1), coords=[srccoords[c].coordinates for c in srccoords], dims=srccoords.dims + ) + outdata = UnitsDataArray( + np.zeros(srcdata.shape), coords=[reqcoords[c].coordinates for c in reqcoords], dims=reqcoords.dims + ) + + interp = Interpolation({"method": "myinterp", "interpolators": [TestFakeInterp], "dims": ["lat", "lon"]}) + outdata = interp.interpolate(srccoords, srcdata, reqcoords, outdata) + + assert np.all(outdata == srcdata) diff --git a/podpac/core/interpolation/test/test_interpolator.py b/podpac/core/interpolation/test/test_interpolator.py new file mode 100644 index 000000000..8e7e36402 --- /dev/null +++ b/podpac/core/interpolation/test/test_interpolator.py @@ -0,0 +1,49 @@ +""" +Test interpolation methods + + +""" +# pylint: disable=C0111,W0212,R0903 + +import traitlets as tl +import numpy as np + +from podpac.core.coordinates import Coordinates, clinspace +from podpac.core.interpolation.interpolator import Interpolator + + +class TestInterpolator(object): + """Test abstract interpolator class""" + + def test_can_select(self): + class CanAlwaysSelect(Interpolator): + def can_select(self, udims, reqcoords, srccoords): + return udims + + class CanNeverSelect(Interpolator): + def can_select(self, udims, reqcoords, srccoords): + return tuple() + + interp = CanAlwaysSelect(method="method") + can_select = interp.can_select(("time", "lat"), None, None) + assert "lat" in can_select and "time" in can_select + + interp = CanNeverSelect(method="method") + can_select = interp.can_select(("time", "lat"), None, None) + assert not can_select + + def test_dim_in(self): + interpolator = Interpolator(methods_supported=["test"], method="test") + + coords = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) + assert interpolator._dim_in("lat", coords) + assert interpolator._dim_in("lat", coords, unstacked=True) + assert not interpolator._dim_in("time", coords) + + coords_two = Coordinates([clinspace(0, 10, 5)], dims=["lat"]) + assert interpolator._dim_in("lat", coords, coords_two) + assert not interpolator._dim_in("lon", coords, coords_two) + + coords_three = Coordinates([(np.linspace(0, 10, 5), np.linspace(0, 10, 5))], dims=["lat_lon"]) + assert not interpolator._dim_in("lat", coords, coords_two, coords_three) + assert interpolator._dim_in("lat", coords, coords_two, coords_three, unstacked=True) diff --git a/podpac/core/interpolation/test/test_interpolators.py b/podpac/core/interpolation/test/test_interpolators.py new file mode 100644 index 000000000..48dc59181 --- /dev/null +++ b/podpac/core/interpolation/test/test_interpolators.py @@ -0,0 +1,401 @@ +""" +Test interpolation methods + + +""" +# pylint: disable=C0111,W0212,R0903 + +import pytest +import traitlets as tl +import numpy as np + +from podpac.core.utils import ArrayTrait +from podpac.core.units import UnitsDataArray +from podpac.core.coordinates import Coordinates, clinspace +from podpac.core.data.rasterio_source import rasterio +from podpac.core.data.datasource import DataSource +from podpac.core.interpolation.interpolation import Interpolation, InterpolationException +from podpac.core.interpolation.interpolators import NearestNeighbor, NearestPreview, Rasterio, ScipyGrid, ScipyPoint + + +class MockArrayDataSource(DataSource): + data = ArrayTrait().tag(attr=True) + coordinates = tl.Instance(Coordinates).tag(attr=True) + + def get_data(self, coordinates, coordinates_index): + return self.create_output_array(coordinates, data=self.data[coordinates_index]) + + +class TestNearest(object): + def test_nearest_preview_select(self): + + # test straight ahead functionality + reqcoords = Coordinates([[-0.5, 1.5, 3.5], [0.5, 2.5, 4.5]], dims=["lat", "lon"]) + srccoords = Coordinates([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dims=["lat", "lon"]) + + interp = Interpolation("nearest_preview") + + srccoords, srccoords_index = srccoords.intersect(reqcoords, outer=True, return_indices=True) + coords, cidx = interp.select_coordinates(srccoords, srccoords_index, reqcoords) + + assert len(coords) == len(srccoords) == len(cidx) + assert len(coords["lat"]) == len(reqcoords["lat"]) + assert len(coords["lon"]) == len(reqcoords["lon"]) + assert np.all(coords["lat"].coordinates == np.array([0, 2, 4])) + + # test when selection is applied serially + # this is equivalent to above + reqcoords = Coordinates([[-0.5, 1.5, 3.5], [0.5, 2.5, 4.5]], dims=["lat", "lon"]) + srccoords = Coordinates([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dims=["lat", "lon"]) + + interp = Interpolation( + [{"method": "nearest_preview", "dims": ["lat"]}, {"method": "nearest_preview", "dims": ["lon"]}] + ) + + srccoords, srccoords_index = srccoords.intersect(reqcoords, outer=True, return_indices=True) + coords, cidx = interp.select_coordinates(srccoords, srccoords_index, reqcoords) + + assert len(coords) == len(srccoords) == len(cidx) + assert len(coords["lat"]) == len(reqcoords["lat"]) + assert len(coords["lon"]) == len(reqcoords["lon"]) + assert np.all(coords["lat"].coordinates == np.array([0, 2, 4])) + + # test when coordinates are stacked and unstacked + # TODO: how to handle stacked/unstacked coordinate asynchrony? + # reqcoords = Coordinates([[-.5, 1.5, 3.5], [.5, 2.5, 4.5]], dims=['lat', 'lon']) + # srccoords = Coordinates([([0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5])], dims=['lat_lon']) + + # interp = Interpolation('nearest_preview') + + # srccoords, srccoords_index = srccoords.intersect(reqcoords, outer=True, return_indices=True) + # coords, cidx = interp.select_coordinates(reqcoords, srccoords, srccoords_index) + + # assert len(coords) == len(srcoords) == len(cidx) + # assert len(coords['lat']) == len(reqcoords['lat']) + # assert len(coords['lon']) == len(reqcoords['lon']) + # assert np.all(coords['lat'].coordinates == np.array([0, 2, 4])) + + def test_interpolation(self): + + for interpolation in ["nearest", "nearest_preview"]: + + # unstacked 1D + source = np.random.rand(5) + coords_src = Coordinates([np.linspace(0, 10, 5)], dims=["lat"]) + node = MockArrayDataSource(data=source, coordinates=coords_src, interpolation=interpolation) + + coords_dst = Coordinates([[1, 1.2, 1.5, 5, 9]], dims=["lat"]) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert output.values[0] == source[0] and output.values[1] == source[0] and output.values[2] == source[1] + + # unstacked N-D + source = np.random.rand(5, 5) + coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) + coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) + + node = MockArrayDataSource(data=source, coordinates=coords_src, interpolation=interpolation) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert output.values[0, 0] == source[1, 1] + + # stacked + # TODO: implement stacked handling + source = np.random.rand(5) + coords_src = Coordinates([(np.linspace(0, 10, 5), np.linspace(0, 10, 5))], dims=["lat_lon"]) + node = MockArrayDataSource( + data=source, + coordinates=coords_src, + interpolation={"method": "nearest", "interpolators": [NearestNeighbor]}, + ) + coords_dst = Coordinates([(np.linspace(1, 9, 3), np.linspace(1, 9, 3))], dims=["lat_lon"]) + + with pytest.raises(InterpolationException): + output = node.eval(coords_dst) + + # TODO: implement stacked handling + # source = stacked, dest = unstacked + source = np.random.rand(5) + coords_src = Coordinates([(np.linspace(0, 10, 5), np.linspace(0, 10, 5))], dims=["lat_lon"]) + node = MockArrayDataSource( + data=source, + coordinates=coords_src, + interpolation={"method": "nearest", "interpolators": [NearestNeighbor]}, + ) + coords_dst = Coordinates([np.linspace(1, 9, 3), np.linspace(1, 9, 3)], dims=["lat", "lon"]) + + with pytest.raises(InterpolationException): + output = node.eval(coords_dst) + + # TODO: implement stacked handling + # source = unstacked, dest = stacked + source = np.random.rand(5, 5) + coords_src = Coordinates([np.linspace(0, 10, 5), np.linspace(0, 10, 5)], dims=["lat", "lon"]) + node = MockArrayDataSource( + data=source, + coordinates=coords_src, + interpolation={"method": "nearest", "interpolators": [NearestNeighbor]}, + ) + coords_dst = Coordinates([(np.linspace(1, 9, 3), np.linspace(1, 9, 3))], dims=["lat_lon"]) + + with pytest.raises(InterpolationException): + output = node.eval(coords_dst) + + def test_spatial_tolerance(self): + + # unstacked 1D + source = np.random.rand(5) + coords_src = Coordinates([np.linspace(0, 10, 5)], dims=["lat"]) + node = MockArrayDataSource( + data=source, + coordinates=coords_src, + interpolation={"method": "nearest", "params": {"spatial_tolerance": 1.1}}, + ) + + coords_dst = Coordinates([[1, 1.2, 1.5, 5, 9]], dims=["lat"]) + output = node.eval(coords_dst) + + print(output) + print(source) + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert output.values[0] == source[0] and np.isnan(output.values[1]) and output.values[2] == source[1] + + def test_time_tolerance(self): + + # unstacked 1D + source = np.random.rand(5, 5) + coords_src = Coordinates( + [np.linspace(0, 10, 5), clinspace("2018-01-01", "2018-01-09", 5)], dims=["lat", "time"] + ) + node = MockArrayDataSource( + data=source, + coordinates=coords_src, + interpolation={ + "method": "nearest", + "params": {"spatial_tolerance": 1.1, "time_tolerance": np.timedelta64(1, "D")}, + }, + ) + + coords_dst = Coordinates([[1, 1.2, 1.5, 5, 9], clinspace("2018-01-01", "2018-01-09", 3)], dims=["lat", "time"]) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert ( + output.values[0, 0] == source[0, 0] + and output.values[0, 1] == source[0, 2] + and np.isnan(output.values[1, 0]) + and np.isnan(output.values[1, 1]) + and output.values[2, 0] == source[1, 0] + and output.values[2, 1] == source[1, 2] + ) + + +class TestInterpolateRasterio(object): + """test interpolation functions""" + + def test_interpolate_rasterio(self): + """ regular interpolation using rasterio""" + + assert rasterio is not None + + source = np.arange(0, 15) + source.resize((3, 5)) + + coords_src = Coordinates([clinspace(0, 10, 3), clinspace(0, 10, 5)], dims=["lat", "lon"]) + coords_dst = Coordinates([clinspace(1, 11, 3), clinspace(1, 11, 5)], dims=["lat", "lon"]) + + # try one specific rasterio case to measure output + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "min", "interpolators": [Rasterio]} + ) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert output.data[0, 3] == 3.0 + assert output.data[0, 4] == 4.0 + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "max", "interpolators": [Rasterio]} + ) + output = node.eval(coords_dst) + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert output.data[0, 3] == 9.0 + assert output.data[0, 4] == 9.0 + + # TODO boundary should be able to use a default + node = MockArrayDataSource( + data=source, + coordinates=coords_src, + interpolation={"method": "bilinear", "interpolators": [Rasterio]}, + boundary={"lat": 2.5, "lon": 1.25}, + ) + output = node.eval(coords_dst) + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + np.testing.assert_allclose( + output, [[1.4, 2.4, 3.4, 4.4, 5.0], [6.4, 7.4, 8.4, 9.4, 10.0], [10.4, 11.4, 12.4, 13.4, 14.0]] + ) + + def test_interpolate_rasterio_descending(self): + """should handle descending""" + + source = np.random.rand(5, 5) + coords_src = Coordinates([clinspace(10, 0, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) + coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "nearest", "interpolators": [Rasterio]} + ) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert np.all(output.lon.values == coords_dst.coords["lon"]) + + +class TestInterpolateScipyGrid(object): + """test interpolation functions""" + + def test_interpolate_scipy_grid(self): + + source = np.arange(0, 25) + source.resize((5, 5)) + + coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) + coords_dst = Coordinates([clinspace(1, 11, 5), clinspace(1, 11, 5)], dims=["lat", "lon"]) + + # try one specific rasterio case to measure output + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "nearest", "interpolators": [ScipyGrid]} + ) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + print(output) + assert output.data[0, 0] == 0.0 + assert output.data[0, 3] == 3.0 + assert output.data[1, 3] == 8.0 + assert np.isnan(output.data[0, 4]) # TODO: how to handle outside bounds + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "cubic_spline", "interpolators": [ScipyGrid]} + ) + output = node.eval(coords_dst) + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert int(output.data[0, 0]) == 2 + assert int(output.data[2, 4]) == 16 + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "bilinear", "interpolators": [ScipyGrid]} + ) + output = node.eval(coords_dst) + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert int(output.data[0, 0]) == 2 + assert int(output.data[3, 3]) == 20 + assert np.isnan(output.data[4, 4]) # TODO: how to handle outside bounds + + def test_interpolate_irregular_arbitrary_2dims(self): + """ irregular interpolation """ + + # try >2 dims + source = np.random.rand(5, 5, 3) + coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5), [2, 3, 5]], dims=["lat", "lon", "time"]) + coords_dst = Coordinates([clinspace(1, 11, 5), clinspace(1, 11, 5), [2, 3, 5]], dims=["lat", "lon", "time"]) + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "nearest", "interpolators": [ScipyGrid]} + ) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert np.all(output.lon.values == coords_dst.coords["lon"]) + assert np.all(output.time.values == coords_dst.coords["time"]) + + # assert output.data[0, 0] == source[] + + def test_interpolate_irregular_arbitrary_descending(self): + """should handle descending""" + + source = np.random.rand(5, 5) + coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) + coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "nearest", "interpolators": [ScipyGrid]} + ) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert np.all(output.lon.values == coords_dst.coords["lon"]) + + def test_interpolate_irregular_arbitrary_swap(self): + """should handle descending""" + + source = np.random.rand(5, 5) + coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) + coords_dst = Coordinates([clinspace(2, 12, 5), clinspace(2, 12, 5)], dims=["lat", "lon"]) + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "nearest", "interpolators": [ScipyGrid]} + ) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert np.all(output.lon.values == coords_dst.coords["lon"]) + + def test_interpolate_irregular_lat_lon(self): + """ irregular interpolation """ + + source = np.random.rand(5, 5) + coords_src = Coordinates([clinspace(0, 10, 5), clinspace(0, 10, 5)], dims=["lat", "lon"]) + coords_dst = Coordinates([[[0, 2, 4, 6, 8, 10], [0, 2, 4, 5, 6, 10]]], dims=["lat_lon"]) + + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "nearest", "interpolators": [ScipyGrid]} + ) + output = node.eval(coords_dst) + + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat_lon.values == coords_dst.coords["lat_lon"]) + assert output.values[0] == source[0, 0] + assert output.values[1] == source[1, 1] + assert output.values[-1] == source[-1, -1] + + +class TestInterpolateScipyPoint(object): + def test_interpolate_scipy_point(self): + """ interpolate point data to nearest neighbor with various coords_dst""" + + source = np.random.rand(6) + coords_src = Coordinates([[[0, 2, 4, 6, 8, 10], [0, 2, 4, 5, 6, 10]]], dims=["lat_lon"]) + coords_dst = Coordinates([[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]], dims=["lat_lon"]) + node = MockArrayDataSource( + data=source, coordinates=coords_src, interpolation={"method": "nearest", "interpolators": [ScipyPoint]} + ) + + output = node.eval(coords_dst) + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat_lon.values == coords_dst.coords["lat_lon"]) + assert output.values[0] == source[0] + assert output.values[-1] == source[3] + + coords_dst = Coordinates([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], dims=["lat", "lon"]) + output = node.eval(coords_dst) + assert isinstance(output, UnitsDataArray) + assert np.all(output.lat.values == coords_dst.coords["lat"]) + assert output.values[0, 0] == source[0] + assert output.values[-1, -1] == source[3] diff --git a/podpac/core/managers/aws.py b/podpac/core/managers/aws.py index 432e8e9c8..a1b099387 100644 --- a/podpac/core/managers/aws.py +++ b/podpac/core/managers/aws.py @@ -11,6 +11,8 @@ import base64 from datetime import datetime +from six import string_types + import boto3 import botocore import traitlets as tl @@ -36,32 +38,34 @@ class LambdaException(Exception): class Lambda(Node): """A `Node` wrapper to evaluate source on AWS Lambda function - + Attributes ---------- aws_access_key_id : str, optional - Access key id from AWS credentials. If :attr:`session` is provided, this attribute will be ignored. Overrides :dict:`podpac.settings`. + Access key id from AWS credentials. If :attr:`session` is provided, this attribute will be ignored. Overrides :attr:`podpac.settings`. aws_region_name : str, optional - Name of the AWS region. If :attr:`session` is provided, this attribute will be ignored. Overrides :dict:`podpac.settings`. + Name of the AWS region. If :attr:`session` is provided, this attribute will be ignored. Overrides :attr:`podpac.settings`. aws_secret_access_key : str - Access key value from AWS credentials. If :attr:`session` is provided, this attribute will be ignored. Overrides :dict:`podpac.settings`. + Access key value from AWS credentials. If :attr:`session` is provided, this attribute will be ignored. Overrides :attr:`podpac.settings`. function_name : str, optional - Name of the lambda function to use or create. Defaults to :str:`podpac.settings["FUNCTION_NAME"]` or "podpac-lambda-autogen". + Name of the lambda function to use or create. Defaults to :attr:`podpac.settings["FUNCTION_NAME"]` or "podpac-lambda-autogen". function_timeout : int, optional Timeout of the lambda function, in seconds. Defaults to 600. function_triggers : list of str, optional Methods to trigger this function. May only include ["eval", "S3", "APIGateway"]. During the :meth:`self.build()` process, this list will determine which AWS resources are linked to Lambda function. Defaults to ["eval"]. function_role_name : str, optional - Name of the AWS role created for lambda function. Defaults to :str:`podpac.settings["FUNCTION_ROLE_NAME"]` or "podpac-lambda-autogen". + Name of the AWS role created for lambda function. Defaults to :attr:`podpac.settings["FUNCTION_ROLE_NAME"]` or "podpac-lambda-autogen". function_s3_bucket : str, optional - S3 bucket name to use with lambda function. Defaults to :str:`podpac.settings["S3_BUCKET_NAME"]` or "podpac-autogen-" with the timestamp to ensure uniqueness. + S3 bucket name to use with lambda function. Defaults to :attr:`podpac.settings["S3_BUCKET_NAME"]` or "podpac-autogen-" with the timestamp to ensure uniqueness. eval_settings : dict, optional Default is podpac.settings. PODPAC settings that will be used to evaluate the Lambda function. + eval_timeout : float, optional + Default is None. The amount of time to wait for an eval to return. To get near asynchronous response, set this to a small number. Other Attributes ---------------- - attrs : dict + node_attrs : dict Additional attributes passed on to the Lambda definition of the base node download_result : Bool Flag that indicated whether node should wait to download the data. @@ -119,16 +123,16 @@ class Lambda(Node): function_source_dist_zip : str, optional Override :attr:`self.function_source_dist_key` and create lambda function using custom source podpac dist archive to :attr:`self.function_s3_bucket` during :meth:`self.build()` process. function_tags : dict, optional - AWS Tags for Lambda function resource. Defaults to :dict:`podpac.settings["AWS_TAGS"]` or {}. + AWS Tags for Lambda function resource. Defaults to :attr:`podpac.settings["AWS_TAGS"]` or {}. function_budget_amount : float, optional EXPERIMENTAL FEATURE Monthly budget for function and associated AWS resources. When usage reaches 80% of this amount, AWS will notify :attr:`function_budget_email`. - Defaults to :str:`podpac.settings["AWS_BUDGET_AMOUNT"]`. + Defaults to :attr:`podpac.settings["AWS_BUDGET_AMOUNT"]`. function_budget_email : str, optional EXPERIMENTAL FEATURE Email to notify when usage reaches 80% of :attr:`function_budget_amount`. - Defaults to :str:`podpac.settings["AWS_BUDGET_EMAIL"]`. + Defaults to :attr:`podpac.settings["AWS_BUDGET_EMAIL"]`. function_budget_name : str, optional EXPERIMENTAL FEATURE Name for AWS budget @@ -138,6 +142,8 @@ class Lambda(Node): Defaults to "USD". See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/budgets.html#Budgets.Client.create_budget for currency (or Unit) options. + output_format : dict, optional + Definition for how output is saved after results are computed. session : :class:`podpac.managers.aws.Session` AWS Session to use for this node. source : :class:`podpac.Node` @@ -164,7 +170,7 @@ def _session_default(self): ) # general function parameters - function_eval_trigger = tl.Enum(["eval", "S3", "APIGateway"], default_value="eval").tag(attr=True, readonly=True) + function_eval_trigger = tl.Enum(["eval", "S3", "APIGateway"], default_value="eval").tag(attr=True) # lambda function parameters function_name = tl.Unicode().tag(attr=True, readonly=True) # see default below @@ -184,7 +190,7 @@ def _session_default(self): function_source_bucket = tl.Unicode(default_value="podpac-dist", allow_none=True).tag(readonly=True) function_source_dist_key = tl.Unicode().tag(readonly=True) # see default below function_source_dependencies_key = tl.Unicode().tag(readonly=True) # see default below - function_allow_unsafe_eval = tl.Bool(default_value=False).tag(readonly=True) + function_allow_unsafe_eval = tl.Bool().tag(readonly=True) # see default below function_restrict_pipelines = tl.List(tl.Unicode(), default_value=[]).tag(readonly=True) _function_arn = tl.Unicode(default_value=None, allow_none=True) _function_last_modified = tl.Unicode(default_value=None, allow_none=True) @@ -194,6 +200,12 @@ def _session_default(self): _function_valid = tl.Bool(default_value=False, allow_none=True) _function = tl.Dict(default_value=None, allow_none=True) # raw response from AWS on "get_" + output_format = tl.Dict(None, allow_none=True).tag(attr=True) + + @property + def outputs(self): + return self.source.outputs + @tl.default("function_name") def _function_name_default(self): if settings["FUNCTION_NAME"] is None: @@ -228,6 +240,12 @@ def _function_source_dependencies_key_default(self): def _function_tags_default(self): return settings["AWS_TAGS"] or {} + @tl.default("function_allow_unsafe_eval") + def _function_allow_unsafe_eval_default(self): + return "UNSAFE_EVAL_HASH" in self.eval_settings and isinstance( + self.eval_settings["UNSAFE_EVAL_HASH"], string_types + ) + # role parameters function_role_name = tl.Unicode().tag(readonly=True) # see default below function_role_description = tl.Unicode(default_value="PODPAC Lambda Role").tag(readonly=True) @@ -378,10 +396,11 @@ def _function_budget_name_default(self): source = tl.Instance(Node, allow_none=True).tag(attr=True) source_output_format = tl.Unicode(default_value="netcdf") source_output_name = tl.Unicode() - attrs = tl.Dict() + node_attrs = tl.Dict() download_result = tl.Bool(True).tag(attr=True) force_compute = tl.Bool().tag(attr=True) eval_settings = tl.Dict().tag(attr=True) + eval_timeout = tl.Float(610).tag(attr=True) @tl.default("source_output_name") def _source_output_name_default(self): @@ -405,9 +424,9 @@ def pipeline(self): """ d = OrderedDict() d["pipeline"] = self.source.definition - if self.attrs: + if self.node_attrs: out_node = next(reversed(d["pipeline"].keys())) - d["pipeline"][out_node]["attrs"].update(self.attrs) + d["pipeline"][out_node]["attrs"].update(self.node_attrs) d["output"] = {"format": self.source_output_format} d["settings"] = self.eval_settings return d @@ -1393,26 +1412,44 @@ def _create_eval_pipeline(self, coordinates): pipeline["settings"][ "FUNCTION_DEPENDENCIES_KEY" ] = self.function_s3_dependencies_key # overwrite in case this is specified explicitly by class + if self.output_format: + pipeline["output"] = self.output_format return pipeline def _eval_invoke(self, coordinates, output=None): """eval node through invoke trigger""" - _log.debug("Evaluating pipeline via invoke") # create eval pipeline pipeline = self._create_eval_pipeline(coordinates) # create lambda client - awslambda = self.session.client("lambda") + config = botocore.config.Config( + read_timeout=self.eval_timeout, max_pool_connections=1001, retries={"max_attempts": 0} + ) + awslambda = self.session.client("lambda", config=config) - # invoke + # pipeline payload payload = bytes(json.dumps(pipeline, indent=4, cls=JSONEncoder).encode("UTF-8")) - response = awslambda.invoke( - FunctionName=self.function_name, - LogType="Tail", # include the execution log in the response. - Payload=payload, - ) + + if self.download_result: + _log.debug("Evaluating pipeline via invoke synchronously") + response = awslambda.invoke( + FunctionName=self.function_name, + LogType="Tail", # include the execution log in the response. + Payload=payload, + ) + else: + # async invocation + _log.debug("Evaluating pipeline via invoke asynchronously") + awslambda.invoke( + FunctionName=self.function_name, + InvocationType="Event", + LogType="Tail", # include the execution log in the response. + Payload=payload, + ) + + return _log.debug("Received response from lambda function") @@ -1428,7 +1465,11 @@ def _eval_invoke(self, coordinates, output=None): # After waiting, load the pickle file like this: payload = response["Payload"].read() - self._output = UnitsDataArray.open(payload) + try: + self._output = UnitsDataArray.open(payload) + except ValueError: + # Not actually a data-array, returning a string instead + return payload.decode("utf-8") return self._output def _eval_s3(self, coordinates, output=None): @@ -1708,7 +1749,7 @@ def put_object(session, bucket_name, bucket_path, file=None, object_acl="private object_config = {"ACL": object_acl, "Bucket": bucket_name, "Key": bucket_path} object_body = None - if isinstance(file, str): + if isinstance(file, string_types): with open(file, "rb") as f: object_body = f.read() else: diff --git a/podpac/core/managers/aws_lambda.py b/podpac/core/managers/aws_lambda.py deleted file mode 100644 index 72f0a2b44..000000000 --- a/podpac/core/managers/aws_lambda.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -:deprecated: See `aws` module -""" - -import warnings - -warnings.warn( - "The `aws_lambda` module is deprecated and will be removed in podpac 2.0. See the `aws` module " - "for AWS management utilites", - DeprecationWarning, -) -from podpac.core.managers.aws import * diff --git a/podpac/core/managers/multi_process.py b/podpac/core/managers/multi_process.py new file mode 100644 index 000000000..4cbbf01b0 --- /dev/null +++ b/podpac/core/managers/multi_process.py @@ -0,0 +1,74 @@ +from __future__ import division, unicode_literals, print_function, absolute_import + +import sys + +from multiprocessing import Process as mpProcess +from multiprocessing import Queue +import traitlets as tl +import logging + +from podpac.core.node import Node +from podpac.core.utils import NodeTrait +from podpac.core.coordinates import Coordinates +from podpac.core.settings import settings + +# Set up logging +_log = logging.getLogger(__name__) + + +def _f(definition, coords, q, outputkw): + try: + n = Node.from_json(definition) + c = Coordinates.from_json(coords) + o = n.eval(c) + o.serialize() + _log.debug("o.shape: {}, output_format: {}".format(o.shape, outputkw)) + if outputkw: + _log.debug("Saving output results to output format {}".format(outputkw)) + o = o.to_format(outputkw["format"], **outputkw.get("format_kwargs")) + q.put(o) + except Exception as e: + q.put(str(e)) + + +class Process(Node): + """ + Source node will be evaluated in another process, and it is blocking! + """ + + source = NodeTrait().tag(attr=True) + output_format = tl.Dict(None, allow_none=True).tag(attr=True) + timeout = tl.Int(None, allow_none=True) + block = tl.Bool(True) + + @property + def outputs(self): + return self.source.outputs + + def eval(self, coordinates, output=None): + definition = self.source.json + coords = coordinates.json + + q = Queue() + process = mpProcess(target=_f, args=(definition, coords, q, self.output_format)) + process.daemon = True + _log.debug("Starting process.") + process.start() + _log.debug("Retrieving data from queue.") + o = q.get(timeout=self.timeout, block=self.block) + _log.debug("Joining.") + process.join() # This is blocking! + _log.debug("Closing.") + if (sys.version_info.major + sys.version_info.minor / 10.0) >= 3.7: + process.close() # New in version Python 3.7 + if isinstance(o, str): + raise Exception(o) + if o is None: + return + o.deserialize() + if output is not None: + output[:] = o.data[:] + else: + output = o + + return output diff --git a/podpac/core/managers/parallel.py b/podpac/core/managers/parallel.py new file mode 100644 index 000000000..54a16a8c5 --- /dev/null +++ b/podpac/core/managers/parallel.py @@ -0,0 +1,414 @@ +""" +Module to help farm out computation to multiple workers and save the results in a zarr file. +""" + +from __future__ import division, unicode_literals, print_function, absolute_import + +import time +import logging +import traitlets as tl +import numpy as np + +from multiprocessing.pool import ThreadPool + +from podpac.core.managers.multi_threading import Lock +from podpac.core.node import Node +from podpac.core.utils import NodeTrait +from podpac.core.data.zarr_source import Zarr +from podpac.core.coordinates import Coordinates, merge_dims + +# Optional dependencies +from lazy_import import lazy_module, lazy_class + +zarr = lazy_module("zarr") +zarrGroup = lazy_class("zarr.Group") +botocore = lazy_module("botocore") + +# Set up logging +_log = logging.getLogger(__name__) + + +class Parallel(Node): + """ + This class launches the parallel node evaluations in separate threads. As such, the node does not need to return + immediately (i.e. does NOT have to be asynchronous). For asynchronous nodes + (i.e. aws.Lambda with download_result=False) use ParrallelAsync + + Attributes + ----------- + chunks: dict + Dictionary of dimensions and sizes that will be iterated over. If a dimension is not in this dictionary, the + size of the eval coordinates will be used for the chunk. In this case, it may not be possible to automatically + set the coordinates of missing dimensions in the final file. + fill_output: bool + Default is True. When True, the final results will be assembled and returned to the user. If False, the final + results should be written to a file by specifying the output_format in a Process or Lambda node. + See note below. + source: podpac.Node + The source dataset for the computation + number_of_workers: int + Default is 1. Number of parallel process workers at one time. + start_i: int, optional + Default is 0. Starting chunk. This allow you to restart a run without having to check/submit 1000's of workers + before getting back to where you were. Empty chunks make the submission slower. + + Notes + ------ + In some cases where the input and output coordinates of the source node is not the same (such as reduce nodes) + and fill_output is True, the user may need to specify 'output' as part of the eval call. + """ + + _repr_keys = ["source", "number_of_workers", "chunks"] + source = NodeTrait().tag(attr=True) + chunks = tl.Dict().tag(attr=True) + fill_output = tl.Bool(True).tag(attr=True) + number_of_workers = tl.Int(1).tag(attr=True) + _lock = Lock() + errors = tl.List() + start_i = tl.Int(0) + + def eval(self, coordinates, output=None): + # Make a thread pool to manage queue + pool = ThreadPool(processes=self.number_of_workers) + + if output is None and self.fill_output: + output = self.create_output_array(coordinates) + + shape = [] + for d in coordinates.dims: + if d in self.chunks: + shape.append(self.chunks[d]) + else: + shape.append(coordinates[d].size) + + results = [] + # inputs = [] + i = 0 + for coords, slc in coordinates.iterchunks(shape, True): + # inputs.append(coords) + if i < self.start_i: + _log.debug("Skipping {} since it is less than self.start_i ({})".format(i, self.start_i)) + i += 1 + continue + + out = None + if self.fill_output and output is not None: + out = output[slc] + with self._lock: + _log.debug("Added {} to worker pool".format(i)) + _log.debug("Node eval with coords: {}, {}".format(slc, coords)) + results.append(pool.apply_async(self.eval_source, [coords, slc, out, i])) + i += 1 + + _log.info("Added all chunks to worker pool. Now waiting for results.") + start_time = time.time() + for i, res in enumerate(results): + # _log.debug('Waiting for results: {} {}'.format(i, inputs[i])) + dt = str(np.timedelta64(int(1000 * (time.time() - start_time)), "ms").astype(object)) + _log.info("({}): Waiting for results: {} / {}".format(dt, i + 1, len(results))) + + # Try to get the results / wait for the results + try: + o, slc = res.get() + except Exception as e: + o = None + slc = None + self.errors.append((i, res, e)) + dt = str(np.timedelta64(int(1000 * (time.time() - start_time)), "ms").astype(object)) + _log.warning("({}) {} failed with exception {}".format(dt, i, e)) + + dt = str(np.timedelta64(int(1000 * (time.time() - start_time)), "ms").astype(object)) + _log.info("({}) Finished result: {} / {}".format(time.time() - start_time, i + 1, len(results))) + + # Fill output + if self.fill_output: + if output is None: + missing_dims = [d for d in coordinates.dims if d not in self.chunks.keys()] + coords = coordinates.drop(missing_dims) + missing_coords = Coordinates.from_xarray(o).drop(list(self.chunks.keys())) + coords = merge_dims([coords, missing_coords]) + coords = coords.transpose(*coordinates.dims) + output = self.create_output_array(coords) + output[slc] = o + + _log.info("Completed parallel execution.") + pool.close() + + return output + + def eval_source(self, coordinates, coordinates_index, out, i, source=None): + if source is None: + source = self.source + # Make a copy to prevent any possibility of memory corruption + source = Node.from_definition(source.definition) + + _log.info("Submitting source {}".format(i)) + return (source.eval(coordinates, output=out), coordinates_index) + + +class ParallelAsync(Parallel): + """ + This class launches the parallel node evaluations in threads up to n_workers, and expects the node.eval to return + quickly for parallel execution. This Node was written with aws.Lambda(eval_timeout=1.25) Nodes in mind. + + Users can implement the `check_worker_available` method or specify the `no_worker_exception` attribute, which is an + exception thrown if workers are not available. + + Attributes + ----------- + chunks: dict + Dictionary of dimensions and sizes that will be iterated over. If a dimension is not in this dictionary, the + size of the eval coordinates will be used for the chunk. In this case, it may not be possible to automatically + set the coordinates of missing dimensions in the final file. + fill_output: bool + Default is True. When True, the final results will be assembled and returned to the user. If False, the final + results should be written to a file by specifying the output_format in a Process or Lambda node. + See note below. + source: podpac.Node + The source dataset for the computation + sleep_time: float + Default is 1 second. Number of seconds to sleep between trying to submit new workers + no_worker_exception: Exception, optional + Default is .Exception class used to identify when a submission failed due to no available workers. The default + is chosen to work with the podpac.managers.Lambda node. + async_exception: Exception + Default is botocore.exceptions.ReadTimeoutException. This is an exception thrown by the async function in case + it time out waiting for a return. In our case, this is a success. The default is chosen to work with the + podpac.managers.Lambda node. + Notes + ------ + In some cases where the input and output coordinates of the source node is not the same (such as reduce nodes) + and fill_output is True, the user may need to specify 'output' as part of the eval call. + """ + + source = NodeTrait().tag(attr=True) + chunks = tl.Dict().tag(attr=True) + fill_output = tl.Bool(True).tag(attr=True) + sleep_time = tl.Float(1).tag(attr=True) + no_worker_exception = tl.Type(botocore.exceptions.ClientError).tag(attr=True) + async_exception = tl.Type(botocore.exceptions.ReadTimeoutError).tag(attr=True) + + def check_worker_available(self): + return True + + def eval_source(self, coordinates, coordinates_index, out, i, source=None): + if source is None: + source = self.source + # Make a copy to prevent any possibility of memory corruption + source = Node.from_definition(source.definition) + + success = False + o = None + while not success: + if self.check_worker_available(): + try: + o = source.eval(coordinates, out) + success = True + except self.async_exception: + # This exception is fine and constitutes a success + o = None + success = True + except self.no_worker_exception as e: + response = e.response + if not (response and response.get("Error", {}).get("Code") == "TooManyRequestsException"): + raise e # Raise error again, not the right error + _log.debug("Worker {} exception {}".format(i, e)) + success = False + time.sleep(self.sleep_time) + else: + _log.debug("Worker unavailable for {}".format(i, e)) + time.sleep(self.sleep_time) + _log.info("Submitting source {}".format(i)) + return (o, coordinates_index) + + +class ZarrOutputMixin(tl.HasTraits): + """ + This class assumes that the node has a 'output_format' attribute + (currently the "Lambda" Node, and the "Process" Node) + + Attributes + ----------- + zarr_file: str + Path to the output zarr file that collects all of the computed results. This can reside on S3. + dataset: ZarrGroup + A handle to the zarr group pointing to the output file + fill_output: bool, optional + Default is False (unlike parent class). If True, will collect the output data and return it as an xarray. + init_file_mode: str, optional + Default is 'w'. Mode used for initializing the zarr file. + zarr_chunks: dict + Size of the chunks in the zarr file for each dimension + zarr_shape: dict, optional + Default is the {coordinated.dims: coordinates.shape}, where coordinates used as part of the eval call. This + does not need to be specified unless the Node modifies the input coordinates (as part of a Reduce operation, + for example). The result can be incorrect and requires care/checking by the user. + zarr_coordinates: podpac.Coordinates, optional + Default is None. If the node modifies the shape of the input coordinates, this allows users to set the + coordinates in the output zarr file. This can be incorrect and requires care by the user. + skip_existing: bool + Default is False. If true, this will check to see if the results already exist. And if so, it will not + submit a job for that particular coordinate evaluation. This assumes self.chunks == self.zar_chunks + list_dir: bool, optional + Default is False. If skip_existing is True, by default existing files are checked by asking for an 'exists' call. + If list_dir is True, then at the first opportunity a "list_dir" is performed on the directory and the results + are cached. + """ + + zarr_file = tl.Unicode().tag(attr=True) + dataset = tl.Any() + zarr_node = NodeTrait() + zarr_data_key = tl.Union([tl.Unicode(), tl.List()]) + fill_output = tl.Bool(False) + init_file_mode = tl.Unicode("a").tag(attr=True) + zarr_chunks = tl.Dict(default_value=None, allow_none=True).tag(attr=True) + zarr_shape = tl.Dict(allow_none=True, default_value=None).tag(attr=True) + zarr_coordinates = tl.Instance(Coordinates, allow_none=True, default_value=None).tag(attr=True) + zarr_dtype = tl.Unicode("f4") + skip_existing = tl.Bool(True).tag(attr=True) + list_dir = tl.Bool(False) + _list_dir = tl.List(allow_none=True, default_value=[]) + _shape = tl.Tuple() + _chunks = tl.List() + aws_client_kwargs = tl.Dict() + aws_config_kwargs = tl.Dict() + + def eval(self, coordinates, output=None): + if self.zarr_shape is None: + self._shape = coordinates.shape + else: + self._shape = tuple(self.zarr_shape.values()) + + # initialize zarr file + if self.zarr_chunks is None: + chunks = [self.chunks[d] for d in coordinates] + else: + chunks = [self.zarr_chunks[d] for d in coordinates] + self._chunks = chunks + zf, data_key, zn = self.initialize_zarr_array(self._shape, chunks) + self.dataset = zf + self.zarr_data_key = data_key + self.zarr_node = zn + zn.keys + + # eval + _log.debug("Starting parallel eval.") + missing_dims = [d for d in coordinates.dims if d not in self.chunks.keys()] + if self.zarr_coordinates is not None: + missing_dims = missing_dims + [d for d in self.zarr_coordinates.dims if d not in missing_dims] + set_coords = merge_dims([coordinates.drop(missing_dims), self.zarr_coordinates]) + else: + set_coords = coordinates.drop(missing_dims) + set_coords.transpose(*coordinates.dims) + + self.set_zarr_coordinates(set_coords, data_key) + if self.list_dir: + dk = data_key + if isinstance(dk, list): + dk = dk[0] + self._list_dir = self.zarr_node.list_dir(dk) + + output = super(ZarrOutputMixin, self).eval(coordinates, output) + + # fill in the coordinates, this is guaranteed to be correct even if the user messed up. + if output is not None: + self.set_zarr_coordinates(Coordinates.from_xarray(output.coords), data_key) + else: + return zf + + return output + + def set_zarr_coordinates(self, coordinates, data_key): + # Fill in metadata + for dk in data_key: + self.dataset[dk].attrs["_ARRAY_DIMENSIONS"] = coordinates.dims + for d in coordinates.dims: + # TODO ADD UNITS AND TIME DECODING INFORMATION + self.dataset.create_dataset(d, shape=coordinates[d].size, overwrite=True) + self.dataset[d][:] = coordinates[d].coordinates + + def initialize_zarr_array(self, shape, chunks): + _log.debug("Creating Zarr file.") + zn = Zarr(source=self.zarr_file, file_mode=self.init_file_mode, aws_client_kwargs=self.aws_client_kwargs) + if self.source.output or getattr(self.source, "data_key", None): + data_key = self.source.output + if data_key is None: + data_key = self.source.data_key + if not isinstance(data_key, list): + data_key = [data_key] + elif self.source.outputs: # If someone restricted the outputs for this node, we need to know + data_key = [dk for dk in data_key if dk in self.source.outputs] + elif self.source.outputs: + data_key = self.source.outputs + else: + data_key = ["data"] + + zf = zarr.open(zn._get_store(), mode=self.init_file_mode) + + # Intialize the output zarr arrays + for dk in data_key: + try: + arr = zf.create_dataset( + dk, + shape=shape, + chunks=chunks, + fill_value=np.nan, + dtype=self.zarr_dtype, + overwrite=not self.skip_existing, + ) + except ValueError: + pass # Dataset already exists + + # Recompute any cached properties + zn = Zarr(source=self.zarr_file, file_mode=self.init_file_mode, aws_client_kwargs=self.aws_client_kwargs) + return zf, data_key, zn + + def eval_source(self, coordinates, coordinates_index, out, i, source=None): + if source is None: + source = self.source + + if self.skip_existing: # This section allows previously computed chunks to be skipped + dk = self.zarr_data_key + if isinstance(dk, list): + dk = dk[0] + try: + exists = self.zarr_node.chunk_exists( + coordinates_index, data_key=dk, list_dir=self._list_dir, chunks=self._chunks + ) + except ValueError as e: # This was needed in cases where a poor internet connection caused read errors + exists = False + if exists: + _log.info("Skipping {} (already exists)".format(i)) + return out, coordinates_index + + # Make a copy to prevent any possibility of memory corruption + source = Node.from_definition(source.definition) + _log.debug("Creating output format.") + output = dict( + format="zarr_part", + format_kwargs=dict( + part=[[s.start, min(s.stop, self._shape[i]), s.step] for i, s in enumerate(coordinates_index)], + source=self.zarr_file, + mode="a", + ), + ) + _log.debug("Finished creating output format.") + + if source.has_trait("output_format"): + source.set_trait("output_format", output) + _log.debug("output: {}, coordinates.shape: {}".format(output, coordinates.shape)) + _log.debug("Evaluating node.") + + o, slc = super(ZarrOutputMixin, self).eval_source(coordinates, coordinates_index, out, i, source) + + if not source.has_trait("output_format"): + o.to_format(output["format"], **output["format_kwargs"]) + return o, slc + + +class ParallelOutputZarr(ZarrOutputMixin, Parallel): + pass + + +class ParallelAsyncOutputZarr(ZarrOutputMixin, ParallelAsync): + pass diff --git a/podpac/core/managers/test/test_aws.py b/podpac/core/managers/test/test_aws.py index af9dc2765..b3379bd83 100644 --- a/podpac/core/managers/test/test_aws.py +++ b/podpac/core/managers/test/test_aws.py @@ -3,8 +3,4 @@ class TestAWS(object): - def test_old_module_deprecation(self): - with pytest.warns(DeprecationWarning): - import podpac.core.managers.aws_lambda - - assert podpac.core.managers.aws_lambda.Lambda + pass diff --git a/podpac/core/managers/test/test_multiprocess.py b/podpac/core/managers/test/test_multiprocess.py new file mode 100644 index 000000000..55ef6fa4c --- /dev/null +++ b/podpac/core/managers/test/test_multiprocess.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +from multiprocessing import Queue + +from podpac.core.coordinates import Coordinates +from podpac.core.algorithm.utility import Arange +from podpac.core.managers.multi_process import Process, _f + + +class TestProcess(object): + def test_mp_results_the_same(self): + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node = Arange() + o_sp = node.eval(coords) + + node_mp = Process(source=node) + o_mp = node_mp.eval(coords) + + np.testing.assert_array_equal(o_sp.data, o_mp.data) + + def test_mp_results_outputs(self): + node = Arange(outputs=["a", "b"]) + node_mp = Process(source=node) + assert node.outputs == node_mp.outputs + + def test_mp_results_the_same_set_output(self): + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node = Arange() + o_sp = node.eval(coords) + output = o_sp.copy() + output[:] = np.nan + + node_mp = Process(source=node) + o_mp = node_mp.eval(coords, output) + + np.testing.assert_array_equal(o_sp, output) + + def test_f(self): + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node = Arange() + q = Queue() + _f(node.json, coords.json, q, {}) + o = q.get() + np.testing.assert_array_equal(o, node.eval(coords)) + + def test_f_fmt(self): + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node = Arange() + q = Queue() + _f(node.json, coords.json, q, {"format": "dict", "format_kwargs": {}}) + o = q.get() + np.testing.assert_array_equal(o["data"], node.eval(coords).to_dict()["data"]) diff --git a/podpac/core/managers/test/test_multithreading.py b/podpac/core/managers/test/test_multithreading.py index 1b73b7325..d7a8c4247 100644 --- a/podpac/core/managers/test/test_multithreading.py +++ b/podpac/core/managers/test/test_multithreading.py @@ -1,11 +1,13 @@ -import pytest import os - +import sys import time - -from podpac.core.managers.multi_threading import FakeLock from threading import Thread +import pytest + +from podpac import settings +from podpac.core.managers.multi_threading import FakeLock, thread_manager + class TestFakeLock(object): def test_enter_exist_single_thread(self): @@ -27,11 +29,64 @@ def f(s): print("Unlocked", s) assert lock._locked == False - t1 = Thread(target=lambda: f("thread"), daemon=True) - t2 = Thread(target=lambda: f("thread"), daemon=True) + if sys.version_info.major == 2: + t1 = Thread(target=lambda: f("thread")) + t2 = Thread(target=lambda: f("thread")) + t1.daemon = True + t2.daemon = True + else: + t1 = Thread(target=lambda: f("thread"), daemon=True) + t2 = Thread(target=lambda: f("thread"), daemon=True) print("In Main Thread") f("main1") print("Starting Thread") t1.run() t2.run() f("main2") + + +class TestThreadManager(object): + def test_request_release_threads_single_threaded(self): + with settings: + settings["N_THREADS"] = 5 + # Requests + n = thread_manager.request_n_threads(3) + assert n == 3 + n = thread_manager.request_n_threads(3) + assert n == 2 + n = thread_manager.request_n_threads(3) + assert n == 0 + + # releases + assert thread_manager._n_threads_used == 5 + n = thread_manager.release_n_threads(3) + assert n == 3 + n = thread_manager.release_n_threads(2) + assert n == 5 + n = thread_manager.release_n_threads(50) + assert n == 5 + + def test_request_release_threads_multi_threaded(self): + def f(s): + print("In", s) + n1 = thread_manager.release_n_threads(s) + time.sleep(0.05) + n2 = thread_manager.release_n_threads(s) + print("Released", s) + assert n2 >= n1 + + with settings: + settings["N_THREADS"] = 7 + + if sys.version_info.major == 2: + t1 = Thread(target=lambda: f(5)) + t2 = Thread(target=lambda: f(6)) + t1.daemon = True + t2.daemon = True + else: + t1 = Thread(target=lambda: f(5), daemon=True) + t2 = Thread(target=lambda: f(6), daemon=True) + f(1) + t1.run() + t2.run() + f(7) diff --git a/podpac/core/managers/test/test_parallel.py b/podpac/core/managers/test/test_parallel.py new file mode 100644 index 000000000..b8debe84c --- /dev/null +++ b/podpac/core/managers/test/test_parallel.py @@ -0,0 +1,124 @@ +import os +import shutil +import sys +import time +import numpy as np +from threading import Thread +import tempfile +import logging + +import pytest + +from podpac import settings +from podpac.core.coordinates import Coordinates +from podpac.core.algorithm.utility import CoordData +from podpac.core.managers.parallel import Parallel, ParallelOutputZarr, ParallelAsync, ParallelAsyncOutputZarr +from podpac.core.managers.multi_process import Process + +logger = logging.getLogger("podpac") +logger.setLevel(logging.DEBUG) + + +class TestParallel(object): + def test_parallel_multi_thread_compute_fill_output(self): + node = CoordData(coord_name="time") + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node_p = Parallel(source=node, number_of_workers=2, chunks={"time": 2}) + o = node.eval(coords) + o_p = node_p.eval(coords) + + np.testing.assert_array_equal(o, o_p) + + def test_parallel_multi_thread_compute_fill_output2(self): + node = CoordData(coord_name="time") + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node_p = Parallel(source=node, number_of_workers=2, chunks={"time": 2}) + o = node.eval(coords) + o_p = o.copy() + o_p[:] = np.nan + node_p.eval(coords, o_p) + + np.testing.assert_array_equal(o, o_p) + + @pytest.mark.skipif(sys.version < "3.7", reason="python < 3.7 cannot handle processes launched from threads") + def test_parallel_process(self): + node = Process(source=CoordData(coord_name="time")) + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node_p = Parallel(source=node, number_of_workers=2, chunks={"time": 2}) + o = node.eval(coords) + o_p = o.copy() + o_p[:] = np.nan + node_p.eval(coords, o_p) + time.sleep(0.1) + + np.testing.assert_array_equal(o, o_p) + + +class TestParallelAsync(object): + @pytest.mark.skipif(sys.version < "3.7", reason="python < 3.7 cannot handle processes launched from threads") + def test_parallel_process_async(self): + node = Process(source=CoordData(coord_name="time")) # , block=False) + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node_p = ParallelAsync(source=node, number_of_workers=2, chunks={"time": 2}, fill_output=False) + node_p.eval(coords) + time.sleep(0.1) + # Just try to make it run... + + +class TestParallelOutputZarr(object): + @pytest.mark.skipif(sys.version < "3.7", reason="python < 3.7 cannot handle processes launched from threads") + def test_parallel_process_zarr(self): + # Can't use tempfile.TemporaryDirectory because multiple processess need access to dir + tmpdir = os.path.join(tempfile.gettempdir(), "test_parallel_process_zarr.zarr") + + node = Process(source=CoordData(coord_name="time")) # , block=False) + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node_p = ParallelOutputZarr( + source=node, number_of_workers=2, chunks={"time": 2}, fill_output=False, zarr_file=tmpdir + ) + o_zarr = node_p.eval(coords) + time.sleep(0.1) + # print(o_zarr.info) + np.testing.assert_array_equal([1, 2, 3, 4, 5], o_zarr["data"][:]) + + shutil.rmtree(tmpdir) + + @pytest.mark.skipif(sys.version < "3.7", reason="python < 3.7 cannot handle processes launched from threads") + def test_parallel_process_zarr_async(self): + # Can't use tempfile.TemporaryDirectory because multiple processess need access to dir + tmpdir = os.path.join(tempfile.gettempdir(), "test_parallel_process_zarr_async.zarr") + + node = Process(source=CoordData(coord_name="time")) # , block=False) + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node_p = ParallelAsyncOutputZarr( + source=node, number_of_workers=5, chunks={"time": 2}, fill_output=False, zarr_file=tmpdir + ) + o_zarr = node_p.eval(coords) + # print(o_zarr.info) + time.sleep(0.01) + np.testing.assert_array_equal([1, 2, 3, 4, 5], o_zarr["data"][:]) + + shutil.rmtree(tmpdir) + + @pytest.mark.skipif(sys.version < "3.7", reason="python < 3.7 cannot handle processes launched from threads") + def test_parallel_process_zarr_async_starti(self): + # Can't use tempfile.TemporaryDirectory because multiple processess need access to dir + tmpdir = os.path.join(tempfile.gettempdir(), "test_parallel_process_zarr_async_starti.zarr") + + node = Process(source=CoordData(coord_name="time")) # , block=False) + coords = Coordinates([[1, 2, 3, 4, 5]], ["time"]) + node_p = ParallelAsyncOutputZarr( + source=node, number_of_workers=5, chunks={"time": 2}, fill_output=False, zarr_file=tmpdir, start_i=1 + ) + o_zarr = node_p.eval(coords) + # print(o_zarr.info) + time.sleep(0.01) + np.testing.assert_array_equal([np.nan, np.nan, 3, 4, 5], o_zarr["data"][:]) + + node_p = ParallelAsyncOutputZarr( + source=node, number_of_workers=5, chunks={"time": 2}, fill_output=False, zarr_file=tmpdir, start_i=0 + ) + o_zarr = node_p.eval(coords) + np.testing.assert_array_equal([1, 2, 3, 4, 5], o_zarr["data"][:]) + + shutil.rmtree(tmpdir) diff --git a/podpac/core/node.py b/podpac/core/node.py index 28ab48818..5e6ee8904 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -9,21 +9,26 @@ import json import inspect import importlib +import warnings from collections import OrderedDict from copy import deepcopy from hashlib import md5 as hash_alg import numpy as np import traitlets as tl +import six +import podpac from podpac.core.settings import settings from podpac.core.units import ureg, UnitsDataArray from podpac.core.utils import common_doc -from podpac.core.utils import JSONEncoder, is_json_serializable +from podpac.core.utils import JSONEncoder +from podpac.core.utils import cached_property +from podpac.core.utils import trait_is_defined from podpac.core.utils import _get_query_params_from_url, _get_from_url, _get_param from podpac.core.coordinates import Coordinates from podpac.core.style import Style -from podpac.core.cache import CacheCtrl, get_default_cache_ctrl, S3CacheStore, make_cache_ctrl +from podpac.core.cache import CacheCtrl, get_default_cache_ctrl, make_cache_ctrl, S3CacheStore, DiskCacheStore from podpac.core.managers.multi_threading import thread_manager @@ -37,7 +42,7 @@ Unit-aware xarray DataArray containing the results of the node evaluation. """, "hash_return": "A unique hash capturing the coordinates and parameters used to evaluate the node. ", - "outdir": "Optional output directory. Uses :attr:`podpac.settings['DISK_CACHE_DIR']` by default", + "outdir": "Optional output directory. Uses :attr:`podpac.settings[.cache_path` by default", "definition_return": """ OrderedDict Dictionary containing the location of the Node, the name of the plugin (if required), as well as any @@ -70,7 +75,13 @@ class NodeException(Exception): - """ Summary """ + """ Base class for exceptions when using podpac nodes """ + + pass + + +class NodeDefinitionError(NodeException): + """ Raised node definition errors, such as when the definition is circular or is not yet unavailable. """ pass @@ -82,9 +93,13 @@ class Node(tl.HasTraits): Attributes ---------- cache_output: bool - Should the node's output be cached? If not provided or None, uses default based on settings. - cache_update: bool - Default is False. Should the node's cached output be updated from the source data? + Should the node's output be cached? If not provided or None, uses default based on settings + (CACHE_NODE_OUTPUT_DEFAULT for general Nodes, and CACHE_DATASOURCE_OUTPUT_DEFAULT for DataSource nodes). + If True, outputs will be cached and retrieved from cache. If False, outputs will not be cached OR retrieved from cache (even if + they exist in cache). + force_eval: bool + Default is False. Should the node's cached output be updated from the source data? If True it ignores the cache + when computing outputs but puts results into the cache (thereby updating the cache) cache_ctrl: :class:`podpac.core.cache.cache.CacheCtrl` Class that controls caching. If not provided, uses default based on settings. dtype : type @@ -111,11 +126,16 @@ class Node(tl.HasTraits): outputs = tl.List(tl.Unicode, allow_none=True).tag(attr=True) output = tl.Unicode(default_value=None, allow_none=True).tag(attr=True) units = tl.Unicode(default_value=None, allow_none=True).tag(attr=True) + style = tl.Instance(Style) + dtype = tl.Any(default_value=float) cache_output = tl.Bool() - cache_update = tl.Bool(False) + force_eval = tl.Bool(False) cache_ctrl = tl.Instance(CacheCtrl, allow_none=True) - style = tl.Instance(Style) + + # list of attribute names, used by __repr__ and __str__ to display minimal info about the node + # e.g. data sources use ['source'] + _repr_keys = [] @tl.default("outputs") def _default_outputs(self): @@ -130,22 +150,8 @@ def _validate_output(self, d): raise ValueError("Invalid output '%s' (available outputs are %s)" % (self.output, self.outputs)) return d["value"] - @tl.default("cache_output") - def _cache_output_default(self): - return settings["CACHE_OUTPUT_DEFAULT"] - - @tl.default("cache_ctrl") - def _cache_ctrl_default(self): - return get_default_cache_ctrl() - - @tl.validate("cache_ctrl") - def _validate_cache_ctrl(self, d): - if d["value"] is None: - d["value"] = CacheCtrl([]) # no cache_stores - return d["value"] - @tl.default("style") - def _style_default(self): + def _default_style(self): return Style() @tl.validate("units") @@ -153,6 +159,14 @@ def _validate_units(self, d): ureg.Unit(d["value"]) # will throw an exception if this is not a valid pint Unit return d["value"] + @tl.default("cache_output") + def _cache_output_default(self): + return settings["CACHE_NODE_OUTPUT_DEFAULT"] + + @tl.default("cache_ctrl") + def _cache_ctrl_default(self): + return get_default_cache_ctrl() + # debugging _requested_coordinates = tl.Instance(Coordinates, allow_none=True) _output = tl.Instance(UnitsDataArray, allow_none=True) @@ -160,6 +174,10 @@ def _validate_units(self, d): # Flag that is True if the Node was run multi-threaded, or None if the question doesn't apply _multi_threaded = tl.Bool(allow_none=True, default_value=None) + # util + _definition_guard = False + _traits_initialized_guard = False + def __init__(self, **kwargs): """ Do not overwrite me """ @@ -174,13 +192,18 @@ def __init__(self, **kwargs): # on subsequent initializations, they will already be read_only. with self.hold_trait_notifications(): for name, trait in self.traits().items(): - if trait.metadata.get("readonly") or trait.metadata.get("attr"): + if settings["DEBUG"]: + trait.read_only = False + elif trait.metadata.get("readonly") or trait.metadata.get("attr"): if name in tkwargs: self.set_trait(name, tkwargs.pop(name)) trait.read_only = True - # Call traitlest constructor - super(Node, self).__init__(**tkwargs) + # Call traitlets constructor + super(Node, self).__init__(**tkwargs) + + self._traits_initialized_guard = True + self.init() def _first_init(self, **kwargs): @@ -203,6 +226,28 @@ def init(self): """ pass + @property + def attrs(self): + """List of node attributes""" + return [name for name in self.traits() if self.trait_metadata(name, "attr")] + + @property + def _repr_info(self): + keys = self._repr_keys[:] + if self.trait_is_defined("output") and self.output is not None: + if "output" not in keys: + keys.append("output") + elif self.trait_is_defined("outputs") and self.outputs is not None: + if "outputs" not in keys: + keys.append("outputs") + return ", ".join("%s=%s" % (key, repr(getattr(self, key))) for key in keys) + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self._repr_info) + + def __str__(self): + return "<%s(%s) attrs: %s>" % (self.__class__.__name__, self._repr_info, ", ".join(self.attrs)) + @common_doc(COMMON_DOC) def eval(self, coordinates, output=None): """ @@ -241,7 +286,7 @@ def eval_group(self, group): def find_coordinates(self): """ - Get all available native coordinates for the Node. Implemented in child classes. + Get all available coordinates for the Node. Implemented in child classes. Returns ------- @@ -282,6 +327,9 @@ def create_output_array(self, coords, data=np.nan, **kwargs): return UnitsDataArray.create(coords, data=data, outputs=self.outputs, dtype=self.dtype, attrs=attrs, **kwargs) + def trait_is_defined(self, name): + return trait_is_defined(self, name) + # ----------------------------------------------------------------------------------------------------------------- # Serialization # ----------------------------------------------------------------------------------------------------------------- @@ -299,21 +347,10 @@ def base_ref(self): return self.__class__.__name__ @property - def base_definition(self): - """ - Base node definition. - - This property is implemented in the primary base nodes (DataSource, Algorithm, and Compositor). Node - subclasses with additional attrs will need to extend this property. - - Returns - ------- - {definition_return} - - """ - + def _base_definition(self): d = OrderedDict() + # node and plugin if self.__module__ == "podpac": d["node"] = self.__class__.__name__ elif self.__module__.startswith("podpac."): @@ -323,45 +360,43 @@ def base_definition(self): d["plugin"] = self.__module__ d["node"] = self.__class__.__name__ + # attrs/inputs attrs = {} - lookup_attrs = {} - - for key, value in self.traits().items(): - if not value.metadata.get("attr", False): - continue - - attr = getattr(self, key) + inputs = {} + for name in self.attrs: + value = getattr(self, name) + + if ( + isinstance(value, Node) + or (isinstance(value, (list, tuple, np.ndarray)) and all(isinstance(elem, Node) for elem in value)) + or (isinstance(value, dict) and all(isinstance(elem, Node) for elem in value.values())) + ): + inputs[name] = value + else: + attrs[name] = value - if key == "units" and attr is None: - continue + if "units" in attrs and attrs["units"] is None: + del attrs["units"] - # check serializable - if not is_json_serializable(attr, cls=JSONEncoder): - raise NodeException("Cannot serialize attr '%s' with type '%s'" % (key, type(attr))) + if "outputs" in attrs and attrs["outputs"] is None: + del attrs["outputs"] - if isinstance(attr, Node): - lookup_attrs[key] = attr - else: - attrs[key] = attr + if "output" in attrs and attrs["output"] is None: + del attrs["output"] if attrs: - # remove unnecessary attrs - if self.outputs is None and "outputs" in attrs: - del attrs["outputs"] - if self.output is None and "output" in attrs: - del attrs["output"] + d["attrs"] = attrs - d["attrs"] = OrderedDict([(key, attrs[key]) for key in sorted(attrs.keys())]) - - if lookup_attrs: - d["lookup_attrs"] = OrderedDict([(key, lookup_attrs[key]) for key in sorted(lookup_attrs.keys())]) + if inputs: + d["inputs"] = inputs + # style if self.style.definition: d["style"] = self.style.definition return d - @property + @cached_property def definition(self): """ Full node definition. @@ -372,84 +407,104 @@ def definition(self): Dictionary-formatted node definition. """ - nodes = [] - refs = [] - definitions = [] - - def add_node(node): - for ref, n in zip(refs, nodes): - if node.hash == n.hash: - return ref - - # get base definition and then replace nodes with references, adding nodes depth first - d = node.base_definition - if "lookup_source" in d: - d["lookup_source"] = add_node(d["lookup_source"]) - if "lookup_attrs" in d: - for key, attr_node in d["lookup_attrs"].items(): - d["lookup_attrs"][key] = add_node(attr_node) - if "inputs" in d: - for key, input_node in d["inputs"].items(): - if input_node is not None: - d["inputs"][key] = add_node(input_node) - if "sources" in d: - sources = [] # we need this list so that we don't overwrite the actual sources array - for i, source_node in enumerate(d["sources"]): - sources.append(add_node(source_node)) - d["sources"] = sources - - # get base ref and then ensure it is unique - ref = node.base_ref - while ref in refs: - if re.search("_[1-9][0-9]*$", ref): - ref, i = ref.rsplit("_", 1) - i = int(i) - else: - i = 0 - ref = "%s_%d" % (ref, i + 1) - - nodes.append(node) - refs.append(ref) - definitions.append(d) - - return ref - - add_node(self) - - return OrderedDict(zip(refs, definitions)) + if getattr(self, "_definition_guard", False): + raise NodeDefinitionError("node definition has a circular dependency") - @property - def pipeline(self): - """Deprecated. See Node.definition and Node.from_definition.""" - from podpac.core.pipeline import Pipeline + if not getattr(self, "_traits_initialized_guard", False): + raise NodeDefinitionError("node is not yet fully initialized") - return Pipeline(definition=OrderedDict({"nodes": self.definition})) + try: + self._definition_guard = True - @property + nodes = [] + refs = [] + definitions = [] + + def add_node(node): + for ref, n in zip(refs, nodes): + if node == n: + return ref + + # get base definition + d = node._base_definition + + if "inputs" in d: + # sort and shallow copy + d["inputs"] = OrderedDict([(key, d["inputs"][key]) for key in sorted(d["inputs"].keys())]) + + # replace nodes with references, adding nodes depth first + for key, value in d["inputs"].items(): + if isinstance(value, Node): + d["inputs"][key] = add_node(value) + elif isinstance(value, (list, tuple, np.ndarray)): + d["inputs"][key] = [add_node(item) for item in value] + elif isinstance(value, dict): + d["inputs"][key] = {k: add_node(v) for k, v in value.items()} + else: + raise TypeError("Invalid input '%s' of type '%s': %s" % (key, type(value))) + + if "attrs" in d: + # sort and shallow copy + d["attrs"] = OrderedDict([(key, d["attrs"][key]) for key in sorted(d["attrs"].keys())]) + + # get base ref and then ensure it is unique + ref = node.base_ref + while ref in refs: + if re.search("_[1-9][0-9]*$", ref): + ref, i = ref.rsplit("_", 1) + i = int(i) + else: + i = 0 + ref = "%s_%d" % (ref, i + 1) + + nodes.append(node) + refs.append(ref) + definitions.append(d) + + return ref + + # add top level node + add_node(self) + + # finalize, verify serializable, and return + definition = OrderedDict(zip(refs, definitions)) + definition["podpac_version"] = podpac.__version__ + json.dumps(definition, cls=JSONEncoder) + return definition + + finally: + self._definition_guard = False + + @cached_property def json(self): - """definition for this node in json format + """Definition for this node in JSON format.""" - Returns - ------- - str - JSON-formatted node definition. - """ return json.dumps(self.definition, separators=(",", ":"), cls=JSONEncoder) - @property + @cached_property def json_pretty(self): + """Definition for this node in JSON format, with indentation suitable for display.""" + return json.dumps(self.definition, indent=4, cls=JSONEncoder) - @property + @cached_property def hash(self): - # Style should not be part of the hash - defn = self.json + """ hash for this node, used in caching and to determine equality. """ + + # deepcopy so that the cached definition property is not modified by the deletes below + d = deepcopy(self.definition) + + # omit version + if "podpac_version" in d: + del d["podpac_version"] - # Note: this ONLY works because the Style node has NO dictionaries as part - # of its attributes - hashstr = re.sub(r'"style":\{.*?\},?', "", defn) + # omit style in every node + for k in d: + if "style" in d[k]: + del d[k]["style"] - return hash_alg(hashstr.encode("utf-8")).hexdigest() + s = json.dumps(d, separators=(",", ":"), cls=JSONEncoder) + return hash_alg(s.encode("utf-8")).hexdigest() def save(self, path): """ @@ -468,6 +523,16 @@ def save(self, path): with open(path, "w") as f: json.dump(self.definition, f, separators=(",", ":"), cls=JSONEncoder) + def __eq__(self, other): + if not isinstance(other, Node): + return False + return self.hash == other.hash + + def __ne__(self, other): + if not isinstance(other, Node): + return True + return self.hash != other.hash + # ----------------------------------------------------------------------------------------------------------------- # Caching Interface # ----------------------------------------------------------------------------------------------------------------- @@ -494,12 +559,17 @@ def get_cache(self, key, coordinates=None): Cached data not found. """ - if not self.has_cache(key, coordinates=coordinates): + try: + self.definition + except NodeDefinitionError as e: + raise NodeException("Cache unavailable, %s (key='%s')" % (e.args[0], key)) + + if self.cache_ctrl is None or not self.has_cache(key, coordinates=coordinates): raise NodeException("cached data not found for key '%s' and coordinates %s" % (key, coordinates)) return self.cache_ctrl.get(self, key, coordinates=coordinates) - def put_cache(self, data, key, coordinates=None, overwrite=False): + def put_cache(self, data, key, coordinates=None, overwrite=True): """ Cache data for this node. @@ -512,13 +582,22 @@ def put_cache(self, data, key, coordinates=None, overwrite=False): coordinates : podpac.Coordinates, optional Coordinates that the cached data depends on. Omit for coordinate-independent data. overwrite : bool, optional - Overwrite existing data, default False + Overwrite existing data, default True. Raises ------ NodeException Cached data already exists (and overwrite is False) """ + + try: + self.definition + except NodeDefinitionError as e: + raise NodeException("Cache unavailable, %s (key='%s')" % (e.args[0], key)) + + if self.cache_ctrl is None: + return + if not overwrite and self.has_cache(key, coordinates=coordinates): raise NodeException("Cached data already exists for key '%s' and coordinates %s" % (key, coordinates)) @@ -541,10 +620,19 @@ def has_cache(self, key, coordinates=None): bool True if there is cached data for this node, key, and coordinates. """ + + try: + self.definition + except NodeDefinitionError as e: + raise NodeException("Cache unavailable, %s (key='%s')" % (e.args[0], key)) + + if self.cache_ctrl is None: + return False + with thread_manager.cache_lock: return self.cache_ctrl.has(self, key, coordinates=coordinates) - def rem_cache(self, key, coordinates=None, mode=None): + def rem_cache(self, key, coordinates=None, mode="all"): """ Clear cached data for this node. @@ -556,13 +644,22 @@ def rem_cache(self, key, coordinates=None, mode=None): Default is None. Delete cached objects for these coordinates. If `'*'`, cached data is deleted for all coordinates, including coordinate-independent data. If None, will only affect coordinate-independent data. mode: str, optional - Specify which cache stores are affected. + Specify which cache stores are affected. Default 'all'. See Also --------- `podpac.core.cache.cache.CacheCtrl.clear` to remove ALL cache for ALL nodes. """ + + try: + self.definition + except NodeDefinitionError as e: + raise NodeException("Cache unavailable, %s (key='%s')" % (e.args[0], key)) + + if self.cache_ctrl is None: + return + self.cache_ctrl.rem(self, key=key, coordinates=coordinates, mode=mode) # --------------------------------------------------------# @@ -591,9 +688,12 @@ def from_definition(cls, definition): load : create a node from file """ - from podpac.core.data.datasource import DataSource - from podpac.core.algorithm.algorithm import BaseAlgorithm - from podpac.core.compositor import Compositor + if "podpac_version" in definition and definition["podpac_version"] != podpac.__version__: + warnings.warn( + "node definition version mismatch " + "(this node was created with podpac version '%s', " + "but your current podpac version is '%s')" % (definition["podpac_version"], podpac.__version__) + ) if len(definition) == 0: raise ValueError("Invalid definition: definition cannot be empty.") @@ -601,6 +701,9 @@ def from_definition(cls, definition): # parse node definitions in order nodes = OrderedDict() for name, d in definition.items(): + if name == "podpac_version": + continue + if "node" not in d: raise ValueError("Invalid definition for node '%s': 'node' property required" % name) @@ -622,84 +725,20 @@ def from_definition(cls, definition): # parse and configure kwargs kwargs = {} - whitelist = ["node", "attrs", "lookup_attrs", "plugin", "style"] - - # DataSource, Compositor, and Algorithm specific properties - parents = inspect.getmro(node_class) - - if DataSource in parents: - if "attrs" in d: - if "source" in d["attrs"]: - raise ValueError( - "Invalid definition for node '%s': DataSource 'attrs' cannot have a 'source' property." - % name - ) - - if "lookup_source" in d["attrs"]: - raise ValueError( - "Invalid definition for node '%s': DataSource 'attrs' cannot have a 'lookup_source' property" - % name - ) - - if "interpolation" in d["attrs"]: - raise ValueError( - "Invalid definition for node '%s': DataSource 'attrs' cannot have an 'interpolation' property" - % name - ) - - if "source" in d: - kwargs["source"] = d["source"] - whitelist.append("source") - - elif "lookup_source" in d: - kwargs["source"] = _get_subattr(nodes, name, d["lookup_source"]) - whitelist.append("lookup_source") - - if "interpolation" in d: - kwargs["interpolation"] = d["interpolation"] - whitelist.append("interpolation") - - if Compositor in parents: - if "attrs" in d: - if "interpolation" in d["attrs"]: - raise ValueError( - "Invalid definition for node '%s': Compositor 'attrs' cannot have an 'interpolation' property" - % name - ) - - if "sources" in d: - sources = [_get_subattr(nodes, name, source) for source in d["sources"]] - kwargs["sources"] = np.array(sources) - whitelist.append("sources") - - if "interpolation" in d: - kwargs["interpolation"] = d["interpolation"] - whitelist.append("interpolation") - - if BaseAlgorithm in parents: - if "attrs" in d: - if "inputs" in d["attrs"]: - raise ValueError( - "Invalid definition for node '%s': Algorithm 'attrs' cannot have an 'inputs' property" - % name - ) - - if "inputs" in d: - inputs = {k: _get_subattr(nodes, name, v) for k, v in d["inputs"].items()} - kwargs.update(inputs) - whitelist.append("inputs") - for k, v in d.get("attrs", {}).items(): kwargs[k] = v + for k, v in d.get("inputs", {}).items(): + kwargs[k] = _lookup_input(nodes, name, v) + for k, v in d.get("lookup_attrs", {}).items(): - kwargs[k] = _get_subattr(nodes, name, v) + kwargs[k] = _lookup_attr(nodes, name, v) if "style" in d: kwargs["style"] = Style.from_definition(d["style"]) for k in d: - if k not in whitelist: + if k not in ["node", "inputs", "attrs", "lookup_attrs", "plugin", "style"]: raise ValueError("Invalid definition for node '%s': unexpected property '%s'" % (name, k)) nodes[name] = node_class(**kwargs) @@ -772,6 +811,7 @@ def from_url(cls, url): Notes ------- The request can specify the PODPAC node by four different mechanism: + * Direct node name: PODPAC will look for an appropriate node in podpac.datalib * JSON definition passed using the 'PARAMS' query string: Need to specify the special LAYER/COVERAGE value of "%PARAMS%" @@ -810,19 +850,103 @@ def from_url(cls, url): return cls.from_definition(d) -def _get_subattr(nodes, name, ref): - refs = ref.split(".") - try: - attr = nodes[refs[0]] - for _name in refs[1:]: - attr = getattr(attr, _name) - except (KeyError, AttributeError): - raise ValueError("Invalid definition for node '%s': reference to nonexistent node/attribute '%s'" % (name, ref)) +def _lookup_input(nodes, name, value): + # containers + if isinstance(value, list): + return [_lookup_input(nodes, name, elem) for elem in value] + + if isinstance(value, dict): + return {k: _lookup_input(nodes, name, v) for k, v in value.items()} + + # node reference + if not isinstance(value, six.string_types): + raise ValueError( + "Invalid definition for node '%s': invalid reference '%s' of type '%s' in inputs" + % (name, value, type(value)) + ) + + if not value in nodes: + raise ValueError( + "Invalid definition for node '%s': reference to nonexistent node '%s' in inputs" % (name, value) + ) + + node = nodes[value] + + # copy in debug mode + if settings["DEBUG"]: + node = deepcopy(node) + + return node + + +def _lookup_attr(nodes, name, value): + # containers + if isinstance(value, list): + return [_lookup_attr(nodes, name, elem) for elem in value] + + if isinstance(value, dict): + return {_k: _lookup_attr(nodes, name, v) for k, v in value.items()} + + if not isinstance(value, six.string_types): + raise ValueError( + "Invalid definition for node '%s': invalid reference '%s' of type '%s' in lookup_attrs" + % (name, value, type(value)) + ) + + # node + elems = value.split(".") + if elems[0] not in nodes: + raise ValueError( + "Invalid definition for node '%s': reference to nonexistent node '%s' in lookup_attrs" % (name, elems[0]) + ) + + # subattrs + attr = nodes[elems[0]] + for n in elems[1:]: + if not hasattr(attr, n): + raise ValueError( + "Invalid definition for node '%s': reference to nonexistent attribute '%s' in lookup_attrs value '%s" + % (name, n, value) + ) + attr = getattr(attr, n) + + # copy in debug mode if settings["DEBUG"]: attr = deepcopy(attr) + return attr +# --------------------------------------------------------# +# Mixins +# --------------------------------------------------------# + + +class NoCacheMixin(tl.HasTraits): + """ Mixin to use no cache by default. """ + + cache_ctrl = tl.Instance(CacheCtrl, allow_none=True) + + @tl.default("cache_ctrl") + def _cache_ctrl_default(self): + return CacheCtrl([]) + + +class DiskCacheMixin(tl.HasTraits): + """ Mixin to add disk caching to the Node by default. """ + + cache_ctrl = tl.Instance(CacheCtrl, allow_none=True) + + @tl.default("cache_ctrl") + def _cache_ctrl_default(self): + # get the default cache_ctrl and addd a disk cache store if necessary + default_ctrl = get_default_cache_ctrl() + stores = default_ctrl._cache_stores + if not any(isinstance(store, DiskCacheStore) for store in default_ctrl._cache_stores): + stores.append(DiskCacheStore()) + return CacheCtrl(stores) + + # --------------------------------------------------------# # Decorators # --------------------------------------------------------# @@ -850,7 +974,7 @@ def wrapper(self, coordinates, output=None): key = cache_key cache_coordinates = coordinates.transpose(*sorted(coordinates.dims)) # order agnostic caching - if not self.cache_update and self.has_cache(key, cache_coordinates): + if not self.force_eval and self.cache_output and self.has_cache(key, cache_coordinates): data = self.get_cache(key, cache_coordinates) if output is not None: order = [dim for dim in output.dims if dim not in data.dims] + list(data.dims) @@ -858,11 +982,8 @@ def wrapper(self, coordinates, output=None): self._from_cache = True else: data = fn(self, coordinates, output=output) - - # We need to check if the cache now has the key because it is possible that - # the previous function call added the key with the coordinates to the cache - if self.cache_output and not (self.has_cache(key, cache_coordinates) and not self.cache_update): - self.put_cache(data, key, cache_coordinates, overwrite=self.cache_update) + if self.cache_output: + self.put_cache(data, key, cache_coordinates) self._from_cache = False # extract single output, if necessary @@ -874,7 +995,7 @@ def wrapper(self, coordinates, output=None): order = [dim for dim in coordinates.idims if dim in data.dims] if "output" in data.dims: order.append("output") - data = data.transpose(*order) + data = data.transpose(*order, transpose_coords=False) if settings["DEBUG"]: self._output = data @@ -885,99 +1006,3 @@ def wrapper(self, coordinates, output=None): return data return wrapper - - -def cache_func(key, depends=None): - """ - Decorating for caching a function's output based on a key. - - Parameters - ----------- - key: str - Key used for caching. - depends: str, list, traitlets.All (optional) - Default is None. Any traits that the cached property depends on. The cached function may NOT - change the value of any of these dependencies (this will result in a RecursionError) - - - Notes - ----- - This decorator cannot handle function input parameters. - - If the function uses any tagged attributes, these will essentially operate like dependencies - because the cache key changes based on the node definition, which is affected by tagged attributes. - - Examples - ---------- - >>> from podpac import Node - >>> from podpac.core.node import cache_func - >>> import traitlets as tl - >>> class MyClass(Node): - value = tl.Int(0) - @cache_func('add') - def add_value(self): - self.value += 1 - return self.value - @cache_func('square', depends='value') - def square_value_depends(self): - return self.value - - >>> n = MyClass(cache_ctrl=None) - >>> n.add_value() # The function as defined is called - 1 - >>> n.add_value() # The function as defined is called again, since we have specified no caching - 2 - >>> n.cache_ctrl = CacheCtrl([RamCacheStore()]) - >>> n.add_value() # The function as defined is called again, and the value is stored in memory - 3 - >>> n.add_value() # The value is retrieved from disk, note the change in n.value is not captured - 3 - >>> n.square_value_depends() # The function as defined is called, and the value is stored in memory - 16 - >>> n.square_value_depends() # The value is retrieved from memory - 16 - >>> n.value += 1 - >>> n.square_value_depends() # The function as defined is called, and the value is stored in memory. Note the change in n.value is captured. - 25 - """ - # This is the actual decorator which will be evaluated and returns the wrapped function - def cache_decorator(func): - # This is the initial wrapper that sets up the observations - @functools.wraps(func) - def cache_wrapper(self): - # This is the function that updates the cached based on observed traits - def cache_updator(change): - # print("Updating value on self:", id(self)) - out = func(self) - self.put_cache(out, key, overwrite=True) - - if depends: - # This sets up the observer on the dependent traits - # print ("setting up observer on self: ", id(self)) - self.observe(cache_updator, depends) - # Since attributes could change on instantiation, anything we previously - # stored is likely out of date. So, force and update to the cache. - cache_updator(None) - - # This is the final wrapper the continues to fetch data from cache - # after the observer has been set up. - @functools.wraps(func) - def cached_function(): - try: - out = self.get_cache(key) - except NodeException: - out = func(self) - self.put_cache(out, key) - return out - - # Since this is the first time the function is run, set the new wrapper - # on the class instance so that the current function won't be called again - # (which would set up an additional observer) - setattr(self, func.__name__, cached_function) - - # Return the value on the first run - return cached_function() - - return cache_wrapper - - return cache_decorator diff --git a/podpac/core/pipeline/__init__.py b/podpac/core/pipeline/__init__.py deleted file mode 100644 index 7df8cb341..000000000 --- a/podpac/core/pipeline/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from podpac.core.pipeline.pipeline import Pipeline, PipelineError diff --git a/podpac/core/pipeline/output.py b/podpac/core/pipeline/output.py deleted file mode 100644 index f61b95772..000000000 --- a/podpac/core/pipeline/output.py +++ /dev/null @@ -1,212 +0,0 @@ -""" -Pipeline output Summary -""" - -from __future__ import absolute_import, division, print_function, unicode_literals - -import os -import warnings -from collections import OrderedDict -from io import BytesIO - -try: - import cPickle # Python 2.7 -except: - import _pickle as cPickle - -import traitlets as tl -import numpy as np -import traitlets as tl - -from podpac.core.node import Node - - -class Output(tl.HasTraits): - """ - Base class for Pipeline Outputs. - - Attributes - ---------- - node : Node - output node - name : string - output name - """ - - node = tl.Instance(Node) - name = tl.Unicode() - - def write(self, output, coordinates): - """Write the node output - - Arguments - --------- - output : UnitsDataArray - Node evaluation output to write - coordinates : Coordinates - Evaluated coordinates. - - Raises - ------ - NotImplementedError - Description - """ - raise NotImplementedError - - @property - def definition(self): - d = OrderedDict() - for key, value in self.traits().items(): - if value.metadata.get("attr", False): - d[key] = getattr(self, key) - return d - - -class NoOutput(Output): - """ No Output """ - - def __init__(self, node, name): - super(NoOutput, self).__init__(node=node, name=name) - - def write(self, output, coordinates): - pass - - -class FileOutput(Output): - """ Output a file to the local filesystem. - - Attributes - ---------- - format : TYPE - Description - outdir : TYPE - Description - """ - - outdir = tl.Unicode() - format = tl.CaselessStrEnum(values=["pickle", "geotif", "png", "nc", "json"], default_value="pickle").tag(attr=True) - mode = tl.Unicode(default_value="file").tag(attr=True) - - _path = tl.Unicode(allow_none=True, default_value=None) - - def __init__(self, node, name, format=None, outdir=None, mode=None): - kwargs = {} - if format is not None: - kwargs["format"] = format - if outdir is not None: - kwargs["outdir"] = outdir - if mode is not None: - kwargs["mode"] = mode - super(FileOutput, self).__init__(node=node, name=name, **kwargs) - - @property - def path(self): - return self._path - - # TODO: docstring? - def write(self, output, coordinates): - filename = "%s_%s_%s" % (self.name, self.node.hash, coordinates.hash) - path = os.path.join(self.outdir, filename) - - if self.format == "pickle": - path = "%s.pkl" % path - with open(path, "wb") as f: - cPickle.dump(output, f) - elif self.format == "png": - raise NotImplementedError("format '%s' not yet implemented" % self.format) - elif self.format == "geotif": - raise NotImplementedError("format '%s' not yet implemented" % self.format) - elif self.format == "nc": - raise NotImplementedError("format '%s' not yet implemented" % self.format) - elif self.format == "json": - raise NotImplementedError("format '$s' not yet implemented" % self.format) - - self._path = path - - -class FTPOutput(Output): - """Output a file and send over FTP. - - Attributes - ---------- - url : TYPE - Description - user : TYPE - Description - """ - - url = tl.Unicode() - user = tl.Unicode() - pw = tl.Unicode() - - def __init__(self, node, name, url=None, user=None, pw=None): - kwargs = {} - if url is not None: - kwargs["url"] = url - if user is not None: - kwargs["user"] = user - if pw is not None: - kwargs["pw"] = pw - super(FTPOutput, self).__init__(node=node, name=name, **kwargs) - - -class S3Output(Output): - """Output a file and send to S3 - - Attributes - ---------- - bucket : TYPE - Description - user : TYPE - Description - """ - - bucket = tl.Unicode() - user = tl.Unicode() - - def __init__(self, node, name, bucket=None, user=None): - kwargs = {} - if bucket is not None: - kwargs["bucket"] = bucket - if user is not None: - kwargs["user"] = user - super(S3Output, self).__init__(node=node, name=name, **kwargs) - - -class ImageOutput(Output): - """Output an image in RAM - - Attributes - ---------- - format : TYPE - Description - image : TYPE - Description - vmax : TYPE - Description - vmin : TYPE - Description - """ - - format = tl.CaselessStrEnum(values=["png"], default_value="png").tag(attr=True) - mode = tl.Unicode(default_value="image").tag(attr=True) - vmin = tl.CFloat(allow_none=True, default_value=np.nan).tag(attr=True) - vmax = tl.CFloat(allow_none=True, default_value=np.nan).tag(attr=True) - image = tl.Bytes(allow_none=True, default_value=None) - - def __init__(self, node, name, format=None, mode=None, vmin=None, vmax=None): - kwargs = {} - if format is not None: - kwargs["format"] = format - if mode is not None: - kwargs["mode"] = mode - if vmin is not None: - kwargs["vmin"] = vmin - if vmax is not None: - kwargs["vmax"] = vmax - - super(ImageOutput, self).__init__(node=node, name=name, **kwargs) - - # TODO: docstring? - def write(self, output, coordinates): - self.image = output.to_image(format=self.format, vmin=self.vmin, vmax=self.vmax, return_base64=True) diff --git a/podpac/core/pipeline/pipeline.py b/podpac/core/pipeline/pipeline.py deleted file mode 100644 index a44527e63..000000000 --- a/podpac/core/pipeline/pipeline.py +++ /dev/null @@ -1,295 +0,0 @@ -""" -Pipeline Summary -""" - -from __future__ import division, unicode_literals, print_function, absolute_import - -import inspect -import warnings -import importlib -from collections import OrderedDict -import json -from copy import deepcopy - -import traitlets as tl -import numpy as np - -from podpac.core.settings import settings -from podpac.core.utils import OrderedDictTrait, JSONEncoder -from podpac.core.node import Node, NodeException -from podpac.core.style import Style -from podpac.core.data.datasource import DataSource -from podpac.core.algorithm.algorithm import BaseAlgorithm -from podpac.core.compositor import Compositor - -from podpac.core.pipeline.output import Output, NoOutput, FileOutput, S3Output, FTPOutput, ImageOutput - - -class PipelineError(NodeException): - """ - Raised when parsing a Pipeline definition fails. - """ - - pass - - -class Pipeline(Node): - """Deprecated. See Node.definition and Node.from_definition.""" - - definition = OrderedDictTrait(help="pipeline definition") - json = tl.Unicode(help="JSON definition") - pipeline_output = tl.Instance(Output, help="pipeline output") - do_write_output = tl.Bool(True) - - def _first_init(self, path=None, **kwargs): - warnings.warn( - "Pipelines are deprecated and will be removed in podpac 2.0. See Node.definition and " - "Node.from_definition for Node serialization.", - DeprecationWarning, - ) - - if (path is not None) + ("definition" in kwargs) + ("json" in kwargs) != 1: - raise TypeError("Pipeline requires exactly one 'path', 'json', or 'definition' argument") - - if path is not None: - with open(path) as f: - kwargs["definition"] = json.load(f, object_pairs_hook=OrderedDict) - - return super(Pipeline, self)._first_init(**kwargs) - - @tl.validate("json") - def _json_validate(self, proposal): - s = proposal["value"] - definition = json.loads(s, object_pairs_hook=OrderedDict) - parse_pipeline_definition(definition) - return json.dumps( - json.loads(s, object_pairs_hook=OrderedDict), separators=(",", ":"), cls=JSONEncoder - ) # standardize - - @tl.validate("definition") - def _validate_definition(self, proposal): - definition = proposal["value"] - parse_pipeline_definition(definition) - return definition - - @tl.default("json") - def _json_from_definition(self): - return json.dumps(self.definition, separators=(",", ":"), cls=JSONEncoder) - - @tl.default("definition") - def _definition_from_json(self): - return json.loads(self.json, object_pairs_hook=OrderedDict) - - @tl.default("pipeline_output") - def _parse_definition(self): - return parse_pipeline_definition(self.definition) - - def eval(self, coordinates, output=None): - """Evaluate the pipeline, writing the output if one is defined. - - Parameters - ---------- - coordinates : TYPE - Description - """ - - self._requested_coordinates = coordinates - - output = self.pipeline_output.node.eval(coordinates, output) - if self.do_write_output: - self.pipeline_output.write(output, coordinates) - - self._output = output - return output - - # ----------------------------------------------------------------------------------------------------------------- - # properties, forwards output node - # ----------------------------------------------------------------------------------------------------------------- - - @property - def node(self): - return self.pipeline_output.node - - @property - def units(self): - return self.node.units - - @property - def dtype(self): - return self.node.dtype - - @property - def cache_ctrl(self): - return self.node.cache_ctrl - - @property - def style(self): - return self.node.style - - -# --------------------------------------------------------------------------------------------------------------------- -# Helper functions -# --------------------------------------------------------------------------------------------------------------------- - - -def parse_pipeline_definition(definition): - if "nodes" not in definition: - raise PipelineError("Pipeline definition requires 'nodes' property") - - if len(definition["nodes"]) == 0: - raise PipelineError("'nodes' property cannot be empty") - - # parse node definitions - nodes = OrderedDict() - for key, d in definition["nodes"].items(): - nodes[key] = _parse_node_definition(nodes, key, d) - - # parse output definition - output = _parse_output_definition(nodes, definition.get("pipeline_output", {})) - - return output - - -def _parse_node_definition(nodes, name, d): - # get node class - module_root = d.get("plugin", "podpac") - node_string = "%s.%s" % (module_root, d["node"]) - module_name, node_name = node_string.rsplit(".", 1) - try: - module = importlib.import_module(module_name) - except ImportError: - raise PipelineError("No module found '%s'" % module_name) - try: - node_class = getattr(module, node_name) - except AttributeError: - raise PipelineError("Node '%s' not found in module '%s'" % (node_name, module_name)) - - # parse and configure kwargs - kwargs = {} - whitelist = ["node", "attrs", "lookup_attrs", "plugin", "style"] - - # DataSource, Compositor, and Algorithm specific properties - parents = inspect.getmro(node_class) - - if DataSource in parents: - if "attrs" in d: - if "source" in d["attrs"]: - raise PipelineError("The 'source' property cannot be in attrs") - - if "lookup_source" in d["attrs"]: - raise PipelineError("The 'lookup_source' property cannot be in attrs") - - if "source" in d: - kwargs["source"] = d["source"] - whitelist.append("source") - - elif "lookup_source" in d: - kwargs["source"] = _get_subattr(nodes, name, d["lookup_source"]) - whitelist.append("lookup_source") - - if Compositor in parents: - if "sources" in d: - sources = [_get_subattr(nodes, name, source) for source in d["sources"]] - kwargs["sources"] = np.array(sources) - whitelist.append("sources") - - if DataSource in parents or Compositor in parents: - if "attrs" in d and "interpolation" in d["attrs"]: - raise PipelineError("The 'interpolation' property cannot be in attrs") - - if "interpolation" in d: - kwargs["interpolation"] = d["interpolation"] - whitelist.append("interpolation") - - if BaseAlgorithm in parents: - if "inputs" in d: - inputs = {k: _get_subattr(nodes, name, v) for k, v in d["inputs"].items()} - kwargs.update(inputs) - whitelist.append("inputs") - - for k, v in d.get("attrs", {}).items(): - kwargs[k] = v - - for k, v in d.get("lookup_attrs", {}).items(): - kwargs[k] = _get_subattr(nodes, name, v) - - if "style" in d: - kwargs["style"] = Style.from_definition(d["style"]) - - for key in d: - if key not in whitelist: - raise PipelineError("node '%s' has unexpected property %s" % (name, key)) - - # return node info - return node_class(**kwargs) - - -def _parse_output_definition(nodes, d): - # node (uses last node by default) - if "node" in d: - name = d["node"] - else: - name = list(nodes.keys())[-1] - - node = _get_subattr(nodes, "output", name) - - # output parameters - config = {k: v for k, v in d.items() if k not in ["node", "mode", "plugin", "output"]} - - # get output class from mode - if "plugin" not in d: - # core output (from mode) - mode = d.get("mode", "none") - if mode == "none": - output_class = NoOutput - elif mode == "file": - output_class = FileOutput - elif mode == "ftp": - output_class = FTPOutput - elif mode == "s3": - output_class = S3Output - elif mode == "image": - output_class = ImageOutput - else: - raise PipelineError("output has unexpected mode '%s'" % mode) - - output = output_class(node, name, **config) - - else: - # custom output (from plugin) - custom_output = "%s.%s" % (d["plugin"], d["output"]) - module_name, class_name = custom_output.rsplit(".", 1) - try: - module = importlib.import_module(module_name) - except ImportError: - raise PipelineError("No module found '%s'" % module_name) - try: - output_class = getattr(module, class_name) - except AttributeError: - raise PipelineError("Output '%s' not found in module '%s'" % (class_name, module_name)) - - try: - output = output_class(node, name, **config) - except Exception as e: - raise PipelineError("Could not create custom output '%s': %s" % (custom_output, e)) - - if not isinstance(output, Output): - raise PipelineError("Custom output '%s' must subclass 'podpac.core.pipeline.output.Output'" % custom_output) - - return output - - -def _get_subattr(nodes, name, ref): - refs = ref.split(".") - - try: - attr = nodes[refs[0]] - for _name in refs[1:]: - attr = getattr(attr, _name) - except (KeyError, AttributeError): - raise PipelineError("'%s' references nonexistent node/attribute '%s'" % (name, ref)) - - if settings["DEBUG"]: - attr = deepcopy(attr) - - return attr diff --git a/podpac/core/pipeline/test/test.json b/podpac/core/pipeline/test/test.json deleted file mode 100644 index 4202055f3..000000000 --- a/podpac/core/pipeline/test/test.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "nodes": { - "a": { - "node": "algorithm.Arange" - } - }, - "pipeline_output": { - "node": "a", - "mode": "file", - "format": "pickle", - "outdir": "." - } -} \ No newline at end of file diff --git a/podpac/core/pipeline/test/test_output.py b/podpac/core/pipeline/test/test_output.py deleted file mode 100644 index 42163ff89..000000000 --- a/podpac/core/pipeline/test/test_output.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import division, unicode_literals, print_function, absolute_import - -import os -import pytest - -import podpac -from podpac.core.algorithm.utility import Arange -from podpac.core.pipeline.output import FileOutput, FTPOutput, S3Output, NoOutput, ImageOutput - -coords = podpac.Coordinates([[0, 1, 2], [10, 20, 30]], dims=["lat", "lon"]) -node = Arange() -node_output = node.eval(coords) - - -class TestNoOutput(object): - def test(self): - output = NoOutput(node=node, name="test") - output.write(node_output, coords) - - -class TestFileOutput(object): - def _test(self, format): - output = FileOutput(node=node, name="test", outdir=".", format=format) - output.write(node_output, coords) - - assert output.path != None - assert os.path.isfile(output.path) - os.remove(output.path) - - def test_pickle(self): - self._test("pickle") - - def test_png(self): - # self._test('png') - - output = FileOutput(node=node, name="test", outdir=".", format="png") - with pytest.raises(NotImplementedError): - output.write(node_output, coords) - - def test_geotif(self): - # self._test('geotif') - - output = FileOutput(node=node, name="test", outdir=".", format="geotif") - with pytest.raises(NotImplementedError): - output.write(node_output, coords) - - -class TestFTPOutput(object): - def test(self): - output = FTPOutput(node=node, name="test", url="none", user="none") - with pytest.raises(NotImplementedError): - output.write(node_output, coords) - - -class TestS3Output(object): - def test(self): - output = S3Output(node=node, name="test", user="none", bucket="none") - with pytest.raises(NotImplementedError): - output.write(node_output, coords) - - -class TestImageOutput(object): - def test(self): - output = ImageOutput(node=node, name="test") - output.write(node_output, coords) - assert output.image is not None diff --git a/podpac/core/pipeline/test/test_parse_pipeline_definition.py b/podpac/core/pipeline/test/test_parse_pipeline_definition.py deleted file mode 100644 index b4cb11818..000000000 --- a/podpac/core/pipeline/test/test_parse_pipeline_definition.py +++ /dev/null @@ -1,848 +0,0 @@ -from __future__ import division, unicode_literals, print_function, absolute_import - -import json -import warnings -from collections import OrderedDict - -import numpy as np -import traitlets as tl -import pytest - -import podpac -from podpac.core.pipeline.pipeline import Pipeline, PipelineError, parse_pipeline_definition -from podpac.core.pipeline.output import NoOutput, FTPOutput, S3Output, FileOutput, ImageOutput - - -class TestParsePipelineDefinition(object): - def test_empty(self): - s = "{ }" - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="Pipeline definition requires 'nodes' property"): - parse_pipeline_definition(d) - - def test_no_nodes(self): - s = '{"nodes": { } }' - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="'nodes' property cannot be empty"): - parse_pipeline_definition(d) - - def test_invalid_node(self): - # module does not exist - s = '{"nodes": {"a": {"node": "nonexistent.Arbitrary"} } }' - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="No module found"): - parse_pipeline_definition(d) - - # node does not exist in module - s = '{"nodes": {"a": {"node": "core.Nonexistent"} } }' - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="Node 'Nonexistent' not found"): - parse_pipeline_definition(d) - - def test_datasource_source(self): - # basic - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.source == "my_data_string" - - # not required - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - - # incorrect - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "attrs": { - "source": "my_data_string" - } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="The 'source' property cannot be in attrs"): - parse_pipeline_definition(d) - - def test_datasource_lookup_source(self): - # sub-node - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "mydata2": { - "node": "data.DataSource", - "lookup_source": "mydata.source" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.source == "my_data_string" - - # nonexistent node - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "mydata2": { - "node": "data.DataSource", - "lookup_source": "nonexistent.source" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="'mydata2' references nonexistent node/attribute"): - parse_pipeline_definition(d) - - # nonexistent subattr - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "mydata2": { - "node": "data.DataSource", - "lookup_source": "mydata.nonexistent.source" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="'mydata2' references nonexistent node/attribute"): - parse_pipeline_definition(d) - - # nonexistent subsubattr - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "mydata2": { - "node": "data.DataSource", - "lookup_source": "double.source.nonexistent" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="'mydata2' references nonexistent node/attribute"): - parse_pipeline_definition(d) - - # in attrs (incorrect) - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "attrs": { - "lookup_source": "my_data_string" - } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="The 'lookup_source' property cannot be in attrs"): - parse_pipeline_definition(d) - - def test_reprojected_source_lookup_source(self): - # source doesn't work - s = """ - { - "nodes": { - "mysource": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "reprojected": { - "node": "data.ReprojectedSource", - "source": "mysource" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(tl.TraitError): - parse_pipeline_definition(d) - - # lookup_source - s = """ - { - "nodes": { - "mysource": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "reprojected": { - "node": "data.ReprojectedSource", - "lookup_source": "mysource" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.source - assert output.node.source.source == "my_data_string" - - # lookup_source subattr - s = """ - { - "nodes": { - "mysource": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "mean": { - "node": "algorithm.Mean", - "inputs": {"source": "mysource"} - }, - "reprojected": { - "node": "data.ReprojectedSource", - "lookup_source": "mean.source" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.source - assert output.node.source.source == "my_data_string" - - # nonexistent node/attribute references are tested in test_datasource_lookup_source - - def test_array_source(self): - s = """ - { - "nodes": { - "mysource": { - "node": "data.Array", - "source": [0, 1, 2] - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - np.testing.assert_array_equal(output.node.source, [0, 1, 2]) - - def test_array_lookup_source(self): - # source doesn't work - s = """ - { - "nodes": { - "a": { - "node": "data.Array", - "source": [0, 1, 2] - }, - "b": { - "node": "data.Array", - "source": "a.source" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(ValueError): - parse_pipeline_definition(d) - - # lookup_source does work - s = """ - { - "nodes": { - "a": { - "node": "data.Array", - "source": [0, 1, 2] - }, - "b": { - "node": "data.Array", - "lookup_source": "a.source" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - np.testing.assert_array_equal(output.node.source, [0, 1, 2]) - - def test_algorithm_inputs(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Insecure evaluation.*") - - # basic - s = """ - { - "nodes": { - "source1": {"node": "algorithm.Arange"}, - "source2": {"node": "algorithm.CoordData"}, - "result": { - "node": "algorithm.Arithmetic", - "inputs": { - "A": "source1", - "B": "source2" - }, - "attrs": { - "eqn": "A + B" - } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - - assert isinstance(output.node.inputs["A"], podpac.algorithm.Arange) - assert isinstance(output.node.inputs["B"], podpac.algorithm.CoordData) - - # sub-node - s = """ - { - "nodes": { - "mysource": {"node": "algorithm.Arange"}, - "mean": { - "node": "algorithm.Mean", - "inputs": {"source": "mysource"} - }, - "double": { - "node": "algorithm.Arithmetic", - "inputs": { "A": "mean.source" }, - "attrs": { "eqn": "2 * A" } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - - assert isinstance(output.node.inputs["A"], podpac.algorithm.Arange) - - # nonexistent node/attribute references are tested in test_datasource_lookup_source - - def test_compositor_sources(self): - # basic - s = """ - { - "nodes": { - "a": {"node": "algorithm.Arange"}, - "b": {"node": "algorithm.CoordData"}, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["a", "b"] - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output.node.sources[0], podpac.algorithm.Arange) - assert isinstance(output.node.sources[1], podpac.algorithm.CoordData) - - # sub-node - s = """ - { - "nodes": { - "source1": {"node": "algorithm.Arange"}, - "mean1": { - "node": "algorithm.Mean", - "inputs": {"source": "source1"} - }, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["mean1.source", "source1"] - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output.node, podpac.compositor.OrderedCompositor) - assert isinstance(output.node.sources[0], podpac.algorithm.Arange) - assert isinstance(output.node.sources[1], podpac.algorithm.Arange) - - # nonexistent node/attribute references are tested in test_datasource_lookup_source - - def test_datasource_interpolation(self): - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string", - "interpolation": "nearest" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.interpolation == "nearest" - - # not required - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - - # incorrect - s = """ - { - "nodes": { - "mydata": { - "node": "data.DataSource", - "attrs": { - "interpolation": "nearest" - } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="The 'interpolation' property cannot be in attrs"): - parse_pipeline_definition(d) - - def test_compositor_interpolation(self): - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange" - }, - "b": { - "node": "algorithm.Arange" - }, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["a", "b"], - "interpolation": "nearest" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.interpolation == "nearest" - - def test_attrs(self): - import podpac.datalib.smap - - s = """ - { - "nodes": { - "sm": { - "node": "datalib.smap.SMAP", - "attrs": { - "product": "SPL4SMGP" - } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.product == "SPL4SMGP" - - def test_lookup_attrs(self): - # attrs doesn't work - s = """ - { - "nodes": { - "a": { - "node": "algorithm.CoordData", - "attrs": { "coord_name": "lat" } - }, - "b": { - "node": "algorithm.CoordData", - "attrs": { "coord_name": "a.coord_name" } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - with pytest.raises(AssertionError): - assert output.node.coord_name == "lat" - - # but lookup_attrs does - s = """ - { - "nodes": { - "a": { - "node": "algorithm.CoordData", - "attrs": { "coord_name": "lat" } - }, - "b": { - "node": "algorithm.CoordData", - "lookup_attrs": { "coord_name": "a.coord_name" } - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert output.node.coord_name == "lat" - - # NOTE: no nodes currently have a Node as an attr - # # lookup node directly (instead of a sub-attr) - # s = ''' - # { - # "nodes": { - # "mysource": { - # "node": "data.DataSource" - # }, - # "mynode": { - # "node": "MyNode", - # "lookup_attrs": { - # "my_node_attr": "mysource" - # } - # } - # } - # } - # ''' - - # d = json.loads(s, object_pairs_hook=OrderedDict) - # output = parse_pipeline_definition(d) - # assert isinstance(output.node.my_node_attr, DataSource) - - # nonexistent node/attribute references are tested in test_datasource_lookup_source - - def test_invalid_property(self): - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange", - "invalid_property": "value" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="node 'a' has unexpected property"): - parse_pipeline_definition(d) - - def test_plugin(self): - pass - - def test_parse_output_none(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": {"node": "a", "mode": "none"} - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, NoOutput) - assert isinstance(output.node, podpac.algorithm.Arange) - assert output.name == "a" - - def test_parse_output_file(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "node": "a", - "mode": "file", - "format": "pickle", - "outdir": "my_directory" - } - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, FileOutput) - assert isinstance(output.node, podpac.algorithm.Arange) - assert output.name == "a" - assert output.format == "pickle" - assert output.outdir == "my_directory" - - def test_parse_output_s3(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "node": "a", - "mode": "s3", - "user": "my_user", - "bucket": "my_bucket" - } - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, S3Output) - assert isinstance(output.node, podpac.algorithm.Arange) - assert output.name == "a" - assert output.user == "my_user" - assert output.bucket == "my_bucket" - - def test_parse_output_ftp(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "node": "a", - "mode": "ftp", - "url": "my_url", - "user": "my_user" - } - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, FTPOutput) - assert isinstance(output.node, podpac.algorithm.Arange) - assert output.name == "a" - assert output.user == "my_user" - assert output.url == "my_url" - # TODO password - - def test_parse_output_image(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "node": "a", - "mode": "image" - } - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, ImageOutput) - assert isinstance(output.node, podpac.algorithm.Arange) - assert output.name == "a" - - def test_parse_output_invalid_mode(self): - # invalid mode - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": {"mode": "nonexistent_mode"} - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="output has unexpected mode"): - parse_pipeline_definition(d) - - def test_parse_output_implicit_mode(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": {"node": "a"} - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, NoOutput) - assert isinstance(output.node, podpac.algorithm.Arange) - assert output.name == "a" - - def test_parse_output_nonexistent_node(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "node": "b", - "mode": "file", - "format": "pickle", - "outdir": "my_directory" - } - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="output' references nonexistent node"): - parse_pipeline_definition(d) - - def test_parse_output_implicit_node(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Insecure evaluation.*") - - s = """ - { - "nodes": { - "source1": {"node": "algorithm.Arange"}, - "source2": {"node": "algorithm.Arange"}, - "result": { - "node": "algorithm.Arithmetic", - "inputs": { - "A": "source1", - "B": "source2" - }, - "attrs": { - "eqn": "A + B" - } - } - }, - "pipeline_output": { - "mode": "none" - } - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output.node, podpac.algorithm.Arithmetic) - - def test_parse_output_implicit(self): - s = """ - { - "nodes": {"a": {"node": "algorithm.Arange"} } - } - """ - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, NoOutput) - assert isinstance(output.node, podpac.algorithm.Arange) - assert output.name == "a" - - def test_parse_custom_output(self): - s = """ { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "plugin": "podpac.core.pipeline.output", - "output": "ImageOutput" - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, ImageOutput) - - s = """ { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "plugin": "podpac", - "output": "core.pipeline.output.ImageOutput" - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - output = parse_pipeline_definition(d) - assert isinstance(output, ImageOutput) - - def test_parse_custom_output_invalid(self): - # no module - s = """ { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "plugin": "nonexistent_module", - "output": "arbitrary" - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="No module found"): - parse_pipeline_definition(d) - - # module okay, but no such class - s = """ { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "plugin": "podpac.core.pipeline.output", - "output": "Nonexistent" - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="Output 'Nonexistent' not found"): - parse_pipeline_definition(d) - - # module okay, class found, could not create - s = """ { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "plugin": "numpy", - "output": "ndarray" - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - with pytest.raises(PipelineError, match="Could not create custom output"): - parse_pipeline_definition(d) - - # module okay, class found, incorrect type - s = """ { - "nodes": {"a": {"node": "algorithm.Arange"} }, - "pipeline_output": { - "plugin": "numpy", - "output": "array" - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - m = "Custom output '.*' must subclass 'podpac.core.pipeline.output.Output'" - with pytest.raises(PipelineError, match=m): - parse_pipeline_definition(d) diff --git a/podpac/core/pipeline/test/test_pipeline.py b/podpac/core/pipeline/test/test_pipeline.py deleted file mode 100644 index 195d35e85..000000000 --- a/podpac/core/pipeline/test/test_pipeline.py +++ /dev/null @@ -1,174 +0,0 @@ -from __future__ import division, unicode_literals, print_function, absolute_import - -import os -import json -from collections import OrderedDict -import warnings - -import numpy as np -import pytest - -import podpac -from podpac.core.algorithm.utility import Arange -from podpac.core.pipeline.pipeline import Pipeline, PipelineError -from podpac.core.pipeline.output import FileOutput - -coords = podpac.Coordinates([[0, 1, 2], [10, 20, 30]], dims=["lat", "lon"]) -node = Arange() -node.eval(coords) - - -class TestPipeline(object): - def test_init_path(self): - path = os.path.join(os.path.abspath(podpac.__path__[0]), "core", "pipeline", "test", "test.json") - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(path=path) - - assert pipeline.json - assert pipeline.definition - assert pipeline.pipeline_output - - def test_init_json(self): - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange" - } - } - } - """ - - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(json=s) - assert pipeline.json - assert pipeline.definition - assert pipeline.pipeline_output - - def test_init_definition(self): - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange" - } - } - } - """ - - d = json.loads(s, object_pairs_hook=OrderedDict) - - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(definition=d) - assert pipeline.json - assert pipeline.definition - assert pipeline.pipeline_output - - def test_init_error(self): - pass - - def test_eval(self): - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange" - } - } - } - """ - - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(json=s) - pipeline.eval(coords) - - pipeline.units - pipeline.dtype - pipeline.cache_ctrl - pipeline.style - - def test_eval_output(self): - path = os.path.join(os.path.abspath(podpac.__path__[0]), "core", "pipeline", "test") - - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange" - } - }, - "pipeline_output": { - "node": "a", - "mode": "file", - "format": "pickle", - "outdir": "." - } - } - """ - - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(json=s) - pipeline.eval(coords) - assert pipeline.pipeline_output.path is not None - assert os.path.isfile(pipeline.pipeline_output.path) - os.remove(pipeline.pipeline_output.path) - - def test_eval_no_output(self): - path = os.path.join(os.path.abspath(podpac.__path__[0]), "core", "pipeline", "test") - - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange" - } - }, - "pipeline_output": { - "node": "a", - "mode": "file", - "format": "pickle", - "outdir": "." - } - } - """ - - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(json=s, do_write_output=False) - pipeline.eval(coords) - if pipeline.pipeline_output.path is not None and os.path.isfile(pipeline.pipeline_output.path): - os.remove(pipeline.pipeline_output.path) - assert pipeline.pipeline_output.path is None - - def test_debuggable(self): - s = """ - { - "nodes": { - "a": { - "node": "algorithm.Arange" - }, - "mean": { - "node": "algorithm.SpatialConvolution", - "inputs": {"source": "a"}, - "attrs": {"kernel_type": "mean,3"} - }, - "c": { - "node": "algorithm.Arithmetic", - "inputs": {"A": "a", "B": "mean"}, - "attrs": {"eqn": "a-b"} - } - } - } - """ - - with podpac.settings, warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Insecure evaluation.*") - - podpac.core.settings.settings["DEBUG"] = False - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(json=s) - assert pipeline.node.inputs["A"] is pipeline.node.inputs["B"].source - - podpac.core.settings.settings["DEBUG"] = True - with pytest.warns(DeprecationWarning): - pipeline = Pipeline(json=s) - assert pipeline.node.inputs["A"] is not pipeline.node.inputs["B"].source diff --git a/podpac/core/pipeline/test/test_pipeline_imports.py b/podpac/core/pipeline/test/test_pipeline_imports.py deleted file mode 100644 index d6534d6eb..000000000 --- a/podpac/core/pipeline/test/test_pipeline_imports.py +++ /dev/null @@ -1,3 +0,0 @@ -def test_import(): - from podpac.core.pipeline import Pipeline - from podpac.core.pipeline import PipelineError diff --git a/podpac/core/settings.py b/podpac/core/settings.py index ed378ce13..8fbd44353 100644 --- a/podpac/core/settings.py +++ b/podpac/core/settings.py @@ -20,10 +20,12 @@ DEFAULT_SETTINGS = { # podpac core settings "DEBUG": False, # This flag currently sets self._output on nodes - "ROOT_PATH": os.path.join(os.path.expanduser("~"), ".podpac"), + "ROOT_PATH": os.path.join(os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~")), ".config", "podpac"), "AUTOSAVE_SETTINGS": False, "LOG_TO_FILE": False, - "LOG_FILE_PATH": os.path.join(os.path.expanduser("~"), ".podpac", "logs", "podpac.log"), + "LOG_FILE_PATH": os.path.join( + os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~")), "podpac", "logs", "podpac.log" + ), "MULTITHREADING": False, "N_THREADS": 8, "CHUNK_SIZE": None, # Size of chunks for parallel processing or large arrays that do not fit in memory @@ -33,7 +35,8 @@ "UNSAFE_EVAL_HASH": uuid.uuid4().hex, # unique id for running unsafe evaluations # cache "DEFAULT_CACHE": ["ram"], - "CACHE_OUTPUT_DEFAULT": True, + "CACHE_DATASOURCE_OUTPUT_DEFAULT": True, + "CACHE_NODE_OUTPUT_DEFAULT": False, "RAM_CACHE_MAX_BYTES": 1e9, # ~1GB "DISK_CACHE_MAX_BYTES": 10e9, # ~10GB "S3_CACHE_MAX_BYTES": 10e9, # ~10GB @@ -65,17 +68,18 @@ class PodpacSettings(dict): Podpac settings are persistently stored in a ``settings.json`` file created at runtime. By default, podpac will create a settings json file in the users - home directory (``~/.podpac/settings.json``) when first run. + home directory (``~/.config/podpac/settings.json``), or $XDG_CONFIG_HOME/podpac/settings.json when first run. Default settings can be overridden or extended by: - * editing the ``settings.json`` file in the home directory (i.e. ``~/.podpac/settings.json``) + * editing the ``settings.json`` file in the settings directory (i.e. ``~/.podpac/settings.json`` or + ``$XDG_CONFIG_HOME/podpac/settings.json``) * creating a ``settings.json`` in the current working directory (i.e. ``./settings.json``) If ``settings.json`` files exist in multiple places, podpac will load settings in the following order, - overwriting previously loaded settings in the process: - * podpac settings defaults - * home directory settings (``~/.podpac/settings.json``) - * current working directory settings (``./settings.json``) + overwriting previously loaded settings in the process (i.e. highest numbered settings file prefered): + 1. podpac settings defaults + 2. settings directory (``~/.podpac/settings.json`` or ``$XDG_CONFIG_HOME/podpac/settings.json``) + 3. current working directory settings (``./settings.json``) :attr:`settings.settings_path` shows the path of the last loaded settings file (e.g. the active settings file). To persistently update the active settings file as changes are made at runtime, @@ -106,8 +110,10 @@ class PodpacSettings(dict): Notification email for when AWS usage reaches 80% of the `AWS_BUDGET_AMOUNT` DEFAULT_CACHE : list Defines a default list of cache stores in priority order. Defaults to `['ram']`. - CACHE_OUTPUT_DEFAULT : bool + CACHE_NODE_OUTPUT_DEFAULT : bool Default value for node ``cache_output`` trait. If True, the outputs of nodes (eval) will be automatically cached. + CACHE_DATASOURCE_OUTPUT_DEFAULT : bool + Default value for DataSource nodes ``cache_output`` trait. If True, the outputs of nodes (eval) will be automatically cached. RAM_CACHE_MAX_BYTES : int Maximum RAM cache size in bytes. Note, for RAM cache only, the limit is applied to the total amount of RAM used by the python process; @@ -124,7 +130,9 @@ class PodpacSettings(dict): Defaults to ``10e9`` (~10G). Set to `None` explicitly for no limit. DISK_CACHE_DIR : str - Subdirectory to use for the disk cache. Defaults to ``'cache'`` in the podpac root directory. + Subdirectory to use for the disk cache. Defaults to ``'cache'`` in the podpac root directory. + Use settings.cache_path to access this settings (this property looks for the environmental variable + `XDG_CACHE_HOME` to adjust the location of the cache directory) S3_CACHE_DIR : str Subdirectory to use for S3 cache (within the specified S3 bucket). Defaults to ``'cache'``. RAM_CACHE_ENABLED: bool @@ -263,6 +271,22 @@ def settings_path(self): """ return self._settings_filepath + @property + def cache_path(self): + """Path to the cache + + Returns + ------- + str + Path to where the cache is stored + """ + if os.path.isabs(settings["DISK_CACHE_DIR"]): + path = settings["DISK_CACHE_DIR"] + else: + path = os.path.join(os.environ.get("XDG_CACHE_HOME", settings["ROOT_PATH"]), settings["DISK_CACHE_DIR"]) + + return path + @property def defaults(self): """ diff --git a/podpac/core/test/test_authentication.py b/podpac/core/test/test_authentication.py index fe96f7bb4..1f8fd1902 100644 --- a/podpac/core/test/test_authentication.py +++ b/podpac/core/test/test_authentication.py @@ -1,39 +1,149 @@ -from __future__ import division, unicode_literals, print_function, absolute_import - import pytest -import sys -from io import StringIO +import requests +import traitlets as tl +import s3fs -import podpac.core.authentication as auth +from podpac import settings, Node +from podpac.core.authentication import RequestsSessionMixin, S3Mixin, set_credentials class TestAuthentication(object): - def test_earth_data_session_update(self): - eds = auth.EarthDataSession() - eds.update_login("testuser", "testpassword") - eds = auth.EarthDataSession() - assert eds.auth == ("testuser", "testpassword") - - def test_earth_data_session_update_input(self): - eds = auth.EarthDataSession() - auth.input = lambda x: "testuser2" - auth.getpass.getpass = lambda: "testpass2" - eds.update_login() - eds = auth.EarthDataSession() - assert eds.auth == ("testuser2", "testpass2") - - def test_earth_data_session_rebuild_auth(self): - eds = auth.EarthDataSession() - - class Dum(object): - pass - - prepared_request = Dum() - prepared_request.headers = {"Authorization": 0} - prepared_request.url = "https://example.com" - - response = Dum() - response.request = Dum() - response.request.url = "https://example2.com" - - eds.rebuild_auth(prepared_request, response) + def test_set_credentials(self): + + with settings: + if "username@test.com" in settings: + del settings["username@test.com"] + + if "password@test.com" in settings: + del settings["password@test.com"] + + # require hostname + with pytest.raises(TypeError): + set_credentials() + + with pytest.raises(ValueError): + set_credentials(None, username="test", password="test") + + with pytest.raises(ValueError): + set_credentials("", username="test", password="test") + + # make sure these are empty at first + assert not settings["username@test.com"] + assert not settings["password@test.com"] + + # test input/getpass + # TODO: how do you test this? + + # set both username and password + set_credentials(hostname="test.com", username="testuser", password="testpass") + assert settings["username@test.com"] == "testuser" + assert settings["password@test.com"] == "testpass" + + # set username only + set_credentials(hostname="test.com", username="testuser2") + assert settings["username@test.com"] == "testuser2" + assert settings["password@test.com"] == "testpass" + + # set password only + set_credentials(hostname="test.com", password="testpass3") + assert settings["username@test.com"] == "testuser2" + assert settings["password@test.com"] == "testpass3" + + # don't do anything if neither is provided, but the settings exist + set_credentials(hostname="test.com") + assert settings["username@test.com"] == "testuser2" + assert settings["password@test.com"] == "testpass3" + + +# dummy class mixing in RequestsSession with hostname +class SomeNodeWithHostname(RequestsSessionMixin): + hostname = "myurl.org" + + +class SomeNode(RequestsSessionMixin): + pass + + +class TestRequestsSessionMixin(object): + def test_hostname(self): + node = SomeNode(hostname="someurl.org") + assert node.hostname == "someurl.org" + + # use class that implements + node = SomeNodeWithHostname() + assert node.hostname == "myurl.org" + + def test_property_value_errors(self): + node = SomeNode(hostname="propertyerrors.com") + + with pytest.raises(ValueError, match="set_credentials"): + u = node.username + + with pytest.raises(ValueError, match="set_credentials"): + p = node.password + + def test_set_credentials(self): + with settings: + node = SomeNode(hostname="setcredentials.com") + node.set_credentials(username="testuser", password="testpass") + assert settings["username@setcredentials.com"] == "testuser" + assert settings["password@setcredentials.com"] == "testpass" + + def test_property_values(self): + with settings: + node = SomeNode(hostname="propertyvalues.com") + node.set_credentials(username="testuser2", password="testpass2") + + assert node.username == "testuser2" + assert node.password == "testpass2" + + def test_session(self): + with settings: + node = SomeNode(hostname="session.net") + node.set_credentials(username="testuser", password="testpass") + + assert node.session + assert node.session.auth == ("testuser", "testpass") + assert isinstance(node.session, requests.Session) + + def test_auth_required(self): + with settings: + with pytest.raises(tl.TraitError): + node = SomeNode(hostname="auth.com", auth_required="true") + + # no auth + node = SomeNode(hostname="auth.com") + assert node.session + assert isinstance(node.session, requests.Session) + with pytest.raises(AttributeError): + node.auth + + # auth required + if "username@auth2.com" in settings: + del settings["username@auth2.com"] + + if "password@auth2.com" in settings: + del settings["password@auth2.com"] + + node = SomeNode(hostname="auth2.com", auth_required=True) + with pytest.raises(ValueError): + s = node.session + print(s) + + node.set_credentials(username="testuser", password="testpass") + assert node.session + assert isinstance(node.session, requests.Session) + + +class TestS3Mixin(object): + class S3Node(S3Mixin, Node): + pass + + def test_anon(self): + node = self.S3Node(anon=True) + assert isinstance(node.s3, s3fs.S3FileSystem) + + @pytest.mark.aws + def test_auth(self): + node = self.S3Node() + assert isinstance(node.s3, s3fs.S3FileSystem) diff --git a/podpac/core/test/test_compositor.py b/podpac/core/test/test_compositor.py deleted file mode 100644 index 453a96d51..000000000 --- a/podpac/core/test/test_compositor.py +++ /dev/null @@ -1,400 +0,0 @@ -import warnings - -import pytest -import numpy as np - -import podpac -from podpac.core.data.datasource import DataSource -from podpac.core.data.array_source import Array -from podpac.compositor import Compositor, OrderedCompositor - -COORDS = podpac.Coordinates( - [podpac.clinspace(45, 0, 16), podpac.clinspace(-70, -65, 16), podpac.clinspace(0, 1, 2)], - dims=["lat", "lon", "time"], -) -LON, LAT, TIME = np.meshgrid(COORDS["lon"].coordinates, COORDS["lat"].coordinates, COORDS["time"].coordinates) - -ARRAY_LAT = Array(source=LAT.astype(float), native_coordinates=COORDS, interpolation="bilinear") -ARRAY_LON = Array(source=LON.astype(float), native_coordinates=COORDS, interpolation="bilinear") -ARRAY_TIME = Array(source=TIME.astype(float), native_coordinates=COORDS, interpolation="bilinear") - -MULTI_0_XY = Array(source=np.full(COORDS.shape + (2,), 0), native_coordinates=COORDS, outputs=["x", "y"]) -MULTI_1_XY = Array(source=np.full(COORDS.shape + (2,), 1), native_coordinates=COORDS, outputs=["x", "y"]) -MULTI_4_YX = Array(source=np.full(COORDS.shape + (2,), 4), native_coordinates=COORDS, outputs=["y", "x"]) -MULTI_2_X = Array(source=np.full(COORDS.shape + (1,), 2), native_coordinates=COORDS, outputs=["x"]) -MULTI_3_Z = Array(source=np.full(COORDS.shape + (1,), 3), native_coordinates=COORDS, outputs=["z"]) - - -class TestCompositor(object): - def test_init(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - repr(node) - - def test_shared_coordinates(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - - with pytest.raises(NotImplementedError): - node.get_shared_coordinates() - - with pytest.raises(NotImplementedError): - node.shared_coordinates() - - def test_source_coordinates(self): - # none (default) - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - assert node.source_coordinates is None - assert node.get_source_coordinates() is None - - # unstacked - node = podpac.compositor.Compositor( - sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], - source_coordinates=podpac.Coordinates([[0, 1]], dims=["time"]), - ) - - # stacked - node = podpac.compositor.Compositor( - sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], - source_coordinates=podpac.Coordinates([[[0, 1], [10, 20]]], dims=["time_alt"]), - ) - - # invalid size - with pytest.raises(ValueError, match="Invalid source_coordinates, source and source_coordinates size mismatch"): - node = podpac.compositor.Compositor( - sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], - source_coordinates=podpac.Coordinates([[0, 1, 2]], dims=["time"]), - ) - - with pytest.raises(ValueError, match="Invalid source_coordinates, source and source_coordinates size mismatch"): - node = podpac.compositor.Compositor( - sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], - source_coordinates=podpac.Coordinates([[0, 1, 2]], dims=["time"]), - ) - - # invalid ndims - with pytest.raises(ValueError, match="Invalid source_coordinates"): - node = podpac.compositor.Compositor( - sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], - source_coordinates=podpac.Coordinates([[0, 1], [10, 20]], dims=["time", "alt"]), - ) - - def test_select_sources(self): - source_coords = podpac.Coordinates([[0, 10]], ["time"]) - node = podpac.compositor.Compositor( - sources=[podpac.algorithm.Arange(), podpac.algorithm.SinCoords()], source_coordinates=source_coords - ) - - selected = node.select_sources(source_coords) - assert len(selected) == 2 - assert selected[0] is node.sources[0] - assert selected[1] is node.sources[1] - - coords = podpac.Coordinates([podpac.clinspace(0, 1, 10), podpac.clinspace(0, 1, 11), 0], ["lat", "lon", "time"]) - selected = node.select_sources(coords) - assert len(selected) == 1 - assert selected[0] is node.sources[0] - - coords = podpac.Coordinates( - [podpac.clinspace(0, 1, 10), podpac.clinspace(0, 1, 11), 10], ["lat", "lon", "time"] - ) - selected = node.select_sources(coords) - assert len(selected) == 1 - assert selected[0] is node.sources[1] - - coords = podpac.Coordinates( - [podpac.clinspace(0, 1, 10), podpac.clinspace(0, 1, 11), 100], ["lat", "lon", "time"] - ) - selected = node.select_sources(coords) - assert len(selected) == 0 - - def test_iteroutputs_interpolation(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME], interpolation="nearest") - outputs = node.iteroutputs(COORDS) - for output in outputs: - pass - assert node.sources[0].interpolation == "nearest" - assert node.sources[1].interpolation == "nearest" - assert node.sources[2].interpolation == "nearest" - assert ARRAY_LAT.interpolation == "bilinear" - assert ARRAY_LON.interpolation == "bilinear" - assert ARRAY_TIME.interpolation == "bilinear" - - # if no interpolation is provided, keep the source interpolation values - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - outputs = node.iteroutputs(COORDS) - for output in outputs: - pass - assert node.sources[0].interpolation == "bilinear" - assert node.sources[1].interpolation == "bilinear" - assert node.sources[2].interpolation == "bilinear" - - def test_iteroutputs_empty(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - outputs = node.iteroutputs(podpac.Coordinates([-1, -1, -1], dims=["lat", "lon", "time"])) - np.testing.assert_array_equal(next(outputs), [[[np.nan]]]) - np.testing.assert_array_equal(next(outputs), [[[np.nan]]]) - np.testing.assert_array_equal(next(outputs), [[[np.nan]]]) - with pytest.raises(StopIteration): - next(outputs) - - def test_iteroutputs_singlethreaded(self): - with podpac.settings: - podpac.settings["MULTITHREADING"] = False - - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - outputs = node.iteroutputs(COORDS) - np.testing.assert_array_equal(next(outputs), LAT) - np.testing.assert_array_equal(next(outputs), LON) - np.testing.assert_array_equal(next(outputs), TIME) - with pytest.raises(StopIteration): - next(outputs) - assert node._multi_threaded == False - - def test_iteroutputs_multithreaded(self): - with podpac.settings: - podpac.settings["MULTITHREADING"] = True - podpac.settings["N_THREADS"] = 8 - - n_threads_before = podpac.core.managers.multi_threading.thread_manager._n_threads_used - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - outputs = node.iteroutputs(COORDS) - np.testing.assert_array_equal(next(outputs), LAT) - np.testing.assert_array_equal(next(outputs), LON) - np.testing.assert_array_equal(next(outputs), TIME) - with pytest.raises(StopIteration): - next(outputs) - assert node._multi_threaded == True - assert podpac.core.managers.multi_threading.thread_manager._n_threads_used == n_threads_before - - def test_iteroutputs_n_threads_1(self): - with podpac.settings: - podpac.settings["MULTITHREADING"] = True - podpac.settings["N_THREADS"] = 1 - - n_threads_before = podpac.core.managers.multi_threading.thread_manager._n_threads_used - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - outputs = node.iteroutputs(COORDS) - np.testing.assert_array_equal(next(outputs), LAT) - np.testing.assert_array_equal(next(outputs), LON) - np.testing.assert_array_equal(next(outputs), TIME) - with pytest.raises(StopIteration): - next(outputs) - assert node._multi_threaded == False - assert podpac.core.managers.multi_threading.thread_manager._n_threads_used == n_threads_before - - def test_composite(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - with pytest.raises(NotImplementedError): - node.composite(COORDS, iter(())) - - def test_eval(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - with pytest.raises(NotImplementedError): - node.eval(COORDS) - - class MockComposite(Compositor): - def composite(self, coordinates, outputs, result=None): - return next(outputs) - - node = MockComposite(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - output = node.eval(COORDS) - np.testing.assert_array_equal(output, LAT) - - def test_find_coordinates(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - - with pytest.raises(NotImplementedError): - node.find_coordinates() - - def test_base_definition(self): - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - d = node.base_definition - assert isinstance(d, dict) - assert "sources" in d - assert "interpolation" in d - - def test_outputs(self): - # standard single-output - node = Compositor(sources=[ARRAY_LAT, ARRAY_LON, ARRAY_TIME]) - assert node.outputs is None - - # multi-output - node = Compositor(sources=[MULTI_0_XY, MULTI_1_XY]) - assert node.outputs == ["x", "y"] - - node = Compositor(sources=[MULTI_0_XY, MULTI_3_Z]) - assert node.outputs == ["x", "y", "z"] - - node = Compositor(sources=[MULTI_3_Z, MULTI_0_XY]) - assert node.outputs == ["z", "x", "y"] - - node = Compositor(sources=[MULTI_0_XY, MULTI_4_YX]) - assert node.outputs == ["x", "y"] - - # multi-output, with strict source outputs checking - node = Compositor(sources=[MULTI_0_XY, MULTI_1_XY], strict_source_outputs=True) - assert node.outputs == ["x", "y"] - - with pytest.raises(ValueError, match="Source outputs mismatch"): - node = Compositor(sources=[MULTI_0_XY, MULTI_2_X], strict_source_outputs=True) - - with pytest.raises(ValueError, match="Source outputs mismatch"): - node = Compositor(sources=[MULTI_0_XY, MULTI_3_Z], strict_source_outputs=True) - - with pytest.raises(ValueError, match="Source outputs mismatch"): - node = Compositor(sources=[MULTI_0_XY, MULTI_4_YX], strict_source_outputs=True) - - # mixed - with pytest.raises(ValueError, match="Cannot composite standard sources with multi-output sources."): - node = Compositor(sources=[MULTI_2_X, ARRAY_LAT]) - - -class TestOrderedCompositor(object): - def test_composite(self): - with podpac.settings: - podpac.settings["MULTITHREADING"] = False - - acoords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) - asource = np.ones(acoords.shape) - asource[0, :] = np.nan - a = Array(source=asource, native_coordinates=acoords) - - bcoords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40]], dims=["lat", "lon"]) - bsource = np.zeros(bcoords.shape) - bsource[:, 0] = np.nan - b = Array(source=bsource, native_coordinates=bcoords) - - coords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40, 50]], dims=["lat", "lon"]) - - node = OrderedCompositor(sources=[a, b], interpolation="bilinear") - expected = np.array( - [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 1.0, 1.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] - ) - np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) - - node = OrderedCompositor(sources=[b, a], interpolation="bilinear") - expected = np.array( - [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 0.0, 0.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] - ) - np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) - - def test_composite_multithreaded(self): - with podpac.settings: - podpac.settings["MULTITHREADING"] = True - podpac.settings["N_THREADS"] = 8 - - acoords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) - asource = np.ones(acoords.shape) - asource[0, :] = np.nan - a = Array(source=asource, native_coordinates=acoords) - - bcoords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40]], dims=["lat", "lon"]) - bsource = np.zeros(bcoords.shape) - bsource[:, 0] = np.nan - b = Array(source=bsource, native_coordinates=bcoords) - - coords = podpac.Coordinates([[0, 1, 2], [10, 20, 30, 40, 50]], dims=["lat", "lon"]) - - node = OrderedCompositor(sources=[a, b], interpolation="bilinear") - expected = np.array( - [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 1.0, 1.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] - ) - np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) - - node = OrderedCompositor(sources=[b, a], interpolation="bilinear") - expected = np.array( - [[np.nan, 0.0, 0.0, 0.0, np.nan], [1.0, 0.0, 0.0, 0.0, np.nan], [np.nan, 0.0, 0.0, 0.0, np.nan]] - ) - np.testing.assert_allclose(node.eval(coords), expected, equal_nan=True) - - def test_composite_short_circuit(self): - with podpac.settings: - podpac.settings["MULTITHREADING"] = False - podpac.settings["DEBUG"] = True - - coords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) - a = Array(source=np.ones(coords.shape), native_coordinates=coords) - b = Array(source=np.zeros(coords.shape), native_coordinates=coords) - node = OrderedCompositor(sources=[a, b], interpolation="bilinear") - output = node.eval(coords) - np.testing.assert_array_equal(output, a.source) - assert node.sources[0]._output is not None - assert node.sources[1]._output is None - - def test_composite_short_circuit_multithreaded(self): - with podpac.settings: - podpac.settings["MULTITHREADING"] = True - podpac.settings["N_THREADS"] = 8 - podpac.settings["DEBUG"] = True - - coords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) - n_threads_before = podpac.core.managers.multi_threading.thread_manager._n_threads_used - a = Array(source=np.ones(coords.shape), native_coordinates=coords) - b = Array(source=np.zeros(coords.shape), native_coordinates=coords) - node = OrderedCompositor(sources=[a, b], interpolation="bilinear") - output = node.eval(coords) - np.testing.assert_array_equal(output, a.source) - assert node._multi_threaded == True - assert podpac.core.managers.multi_threading.thread_manager._n_threads_used == n_threads_before - - def test_composite_into_result(self): - coords = podpac.Coordinates([[0, 1], [10, 20, 30]], dims=["lat", "lon"]) - a = Array(source=np.ones(coords.shape), native_coordinates=coords) - b = Array(source=np.zeros(coords.shape), native_coordinates=coords) - node = OrderedCompositor(sources=[a, b], interpolation="bilinear") - result = node.create_output_array(coords, data=np.random.random(coords.shape)) - output = node.eval(coords, output=result) - np.testing.assert_array_equal(output, a.source) - np.testing.assert_array_equal(result, a.source) - - def test_composite_multiple_outputs(self): - node = OrderedCompositor(sources=[MULTI_0_XY, MULTI_1_XY]) - output = node.eval(COORDS) - assert output.dims == ("lat", "lon", "time", "output") - np.testing.assert_array_equal(output["output"], ["x", "y"]) - np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 0)) - np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 0)) - - node = OrderedCompositor(sources=[MULTI_1_XY, MULTI_0_XY], strict_source_outputs=True) - output = node.eval(COORDS) - assert output.dims == ("lat", "lon", "time", "output") - np.testing.assert_array_equal(output["output"], ["x", "y"]) - np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 1)) - np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 1)) - - def test_composite_combine_multiple_outputs(self): - node = OrderedCompositor(sources=[MULTI_0_XY, MULTI_1_XY, MULTI_2_X, MULTI_3_Z]) - output = node.eval(COORDS) - assert output.dims == ("lat", "lon", "time", "output") - np.testing.assert_array_equal(output["output"], ["x", "y", "z"]) - np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 0)) - np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 0)) - np.testing.assert_array_equal(output.sel(output="z"), np.full(COORDS.shape, 3)) - - node = OrderedCompositor(sources=[MULTI_3_Z, MULTI_2_X, MULTI_0_XY, MULTI_1_XY]) - output = node.eval(COORDS) - assert output.dims == ("lat", "lon", "time", "output") - np.testing.assert_array_equal(output["output"], ["z", "x", "y"]) - np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 2)) - np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 0)) - np.testing.assert_array_equal(output.sel(output="z"), np.full(COORDS.shape, 3)) - - node = OrderedCompositor(sources=[MULTI_2_X, MULTI_4_YX]) - output = node.eval(COORDS) - assert output.dims == ("lat", "lon", "time", "output") - np.testing.assert_array_equal(output["output"], ["x", "y"]) - np.testing.assert_array_equal(output.sel(output="x"), np.full(COORDS.shape, 2)) - np.testing.assert_array_equal(output.sel(output="y"), np.full(COORDS.shape, 4)) - - def test_composite_stacked_unstacked(self): - anative = podpac.Coordinates([podpac.clinspace((0, 1), (1, 2), size=3)], dims=["lat_lon"]) - bnative = podpac.Coordinates([podpac.clinspace(-2, 3, 3), podpac.clinspace(-1, 4, 3)], dims=["lat", "lon"]) - a = Array(source=np.random.rand(3), native_coordinates=anative) - b = Array(source=np.random.rand(3, 3) + 2, native_coordinates=bnative) - - coords = podpac.Coordinates([podpac.clinspace(-3, 4, 32), podpac.clinspace(-2, 5, 32)], dims=["lat", "lon"]) - - node = OrderedCompositor(sources=np.array([a, b]), interpolation="nearest") - o = node.eval(coords) - # Check that both data sources are being used in the interpolation - assert np.any(o.data >= 2) - assert np.any(o.data <= 1) diff --git a/podpac/core/test/test_node.py b/podpac/core/test/test_node.py index 32678f27c..c9c426b49 100644 --- a/podpac/core/test/test_node.py +++ b/podpac/core/test/test_node.py @@ -2,8 +2,8 @@ import os import json -import six import warnings +import tempfile from collections import OrderedDict from copy import deepcopy @@ -12,6 +12,7 @@ except: # Python 2.7 import urlparse as urllib +import six import pytest import numpy as np import xarray as xr @@ -23,27 +24,153 @@ import podpac from podpac.core import common_test_utils as ctu -from podpac.core.utils import ArrayTrait +from podpac.core.utils import ArrayTrait, NodeTrait from podpac.core.units import UnitsDataArray from podpac.core.style import Style -from podpac.core.cache import CacheCtrl, RamCacheStore -from podpac.core.node import Node, NodeException +from podpac.core.cache import CacheCtrl, RamCacheStore, DiskCacheStore +from podpac.core.node import Node, NodeException, NodeDefinitionError from podpac.core.node import node_eval +from podpac.core.node import NoCacheMixin, DiskCacheMixin class TestNode(object): - def test_eval_not_implemented(self): + def test_style(self): + node = Node() + assert isinstance(node.style, Style) + + def test_units(self): + node = Node(units="meters") + + with pytest.raises(UndefinedUnitError): + Node(units="abc") + + def test_outputs(self): + node = Node() + assert node.outputs is None + + node = Node(outputs=["a", "b"]) + assert node.outputs == ["a", "b"] + + def test_output(self): + node = Node() + assert node.output is None + + node = Node(outputs=["a", "b"]) + assert node.output is None + + node = Node(outputs=["a", "b"], output="b") + assert node.output == "b" + + # must be one of the outputs + with pytest.raises(ValueError, match="Invalid output"): + node = Node(outputs=["a", "b"], output="other") + + # only valid for multiple-output nodes + with pytest.raises(TypeError, match="Invalid output"): + node = Node(output="other") + + def test_cache_output(self): + with podpac.settings: + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = False + node = Node() + assert not node.cache_output + + podpac.settings["CACHE_NODE_OUTPUT_DEFAULT"] = True + node = Node() + assert node.cache_output + + def test_cache_ctrl(self): + # settings + with podpac.settings: + podpac.settings["DEFAULT_CACHE"] = ["ram"] + node = Node() + assert node.cache_ctrl is not None + assert len(node.cache_ctrl._cache_stores) == 1 + assert isinstance(node.cache_ctrl._cache_stores[0], RamCacheStore) + + podpac.settings["DEFAULT_CACHE"] = ["ram", "disk"] + node = Node() + assert node.cache_ctrl is not None + assert len(node.cache_ctrl._cache_stores) == 2 + assert isinstance(node.cache_ctrl._cache_stores[0], RamCacheStore) + assert isinstance(node.cache_ctrl._cache_stores[1], DiskCacheStore) + + # specify + node = Node(cache_ctrl=["ram"]) + assert node.cache_ctrl is not None + assert len(node.cache_ctrl._cache_stores) == 1 + assert isinstance(node.cache_ctrl._cache_stores[0], RamCacheStore) + + node = Node(cache_ctrl=["ram", "disk"]) + assert node.cache_ctrl is not None + assert len(node.cache_ctrl._cache_stores) == 2 + assert isinstance(node.cache_ctrl._cache_stores[0], RamCacheStore) + assert isinstance(node.cache_ctrl._cache_stores[1], DiskCacheStore) + + def test_tagged_attr_readonly(self): + class MyNode(Node): + my_attr = tl.Any().tag(attr=True) + + with podpac.settings: + podpac.settings["DEBUG"] = False + node = MyNode() + assert node.traits()["my_attr"].read_only + + podpac.settings["DEBUG"] = True + node = MyNode() + assert not node.traits()["my_attr"].read_only + + def test_trait_is_defined(self): + node = Node() + assert node.trait_is_defined("units") + + def test_init(self): + class MyNode(Node): + init_run = False + + def init(self): + super(MyNode, self).init() + self.init_run = True + + node = MyNode() + assert node.init_run + + def test_attrs(self): + class MyNode(Node): + my_attr = tl.Any().tag(attr=True) + my_trait = tl.Any() + + n = MyNode() + assert "my_attr" in n.attrs + assert "my_trait" not in n.attrs + + def test_repr(self): n = Node() - with pytest.raises(NotImplementedError): - n.eval(None) + repr(n) - with pytest.raises(NotImplementedError): - n.eval(None, output=None) + n = Node(outputs=["a", "b"]) + repr(n) + assert "outputs=" in repr(n) + assert "output=" not in repr(n) - def test_find_coordinates_not_implemented(self): + n = Node(outputs=["a", "b"], output="a") + repr(n) + assert "outputs=" not in repr(n) + assert "output=" in repr(n) + + def test_str(self): n = Node() - with pytest.raises(NotImplementedError): - n.find_coordinates() + str(n) + + n = Node(outputs=["a", "b"]) + str(n) + assert "outputs=" in str(n) + assert "output=" not in str(n) + + n = Node(outputs=["a", "b"], output="a") + str(n) + assert "outputs=" not in str(n) + assert "output=" in str(n) def test_eval_group(self): class MyNode(Node): @@ -54,8 +181,8 @@ def eval(self, coordinates, output=None): c2 = podpac.Coordinates([[10, 11], [10, 11, 12]], dims=["lat", "lon"]) g = podpac.coordinates.GroupCoordinates([c1, c2]) - n = MyNode() - outputs = n.eval_group(g) + node = MyNode() + outputs = node.eval_group(g) assert isinstance(outputs, list) assert len(outputs) == 2 assert isinstance(outputs[0], UnitsDataArray) @@ -65,69 +192,23 @@ def eval(self, coordinates, output=None): # invalid with pytest.raises(Exception): - n.eval_group(c1) + node.eval_group(c1) with pytest.raises(Exception): - n.eval(g) - - def test_units(self): - n = Node(units="meters") - - with pytest.raises(UndefinedUnitError): - Node(units="abc") - - def test_outputs(self): - n = Node() - assert n.outputs is None - - n = Node(outputs=["a", "b"]) - assert n.outputs == ["a", "b"] - - def test_outputs_and_output(self): - n = Node(outputs=["a", "b"]) - assert n.output is None + node.eval(g) - n = Node(outputs=["a", "b"], output="b") - assert n.output == "b" - - # must be one of the outputs - with pytest.raises(ValueError, match="Invalid output"): - n = Node(outputs=["a", "b"], output="other") - - # only valid for multiple-output nodes - with pytest.raises(TypeError, match="Invalid output"): - n = Node(output="other") - - -def TestNodeEval(self): - def test_extract_output(self): - coords = podpac.Coordinates([[0, 1, 2, 3], [0, 1]], dims=["lat", "lon"]) - - class MyNode1(Node): - @node_eval - def eval(self, coordinates, output=None): - return self.create_output_array(coordinates) - - # don't extract when no output field is requested - n = MyNode1() - out = n.eval(coords) - assert out.shape == (4, 2, 3) - - # do extract when an output field is requested - n = MyNode1(output="b") - out = n.eval(coords) - assert out.shape == (4, 2) + def test_eval_not_implemented(self): + node = Node() + with pytest.raises(NotImplementedError): + node.eval(None) - # should still work if the node has already extracted it - class MyNode2(Node): - @node_eval - def eval(self, coordinates, output=None): - out = self.create_output_array(coordinates) - return out.sel(output=self.output) + with pytest.raises(NotImplementedError): + node.eval(None, output=None) - n = MyNode2(output="b") - out = n.eval(coords) - assert out.shape == (4, 2) + def test_find_coordinates_not_implemented(self): + node = Node() + with pytest.raises(NotImplementedError): + node.find_coordinates() class TestCreateOutputArray(object): @@ -184,6 +265,41 @@ def test_create_output_array_crs(self): assert output.crs == crs +class TestNodeEval(object): + def test_extract_output(self): + coords = podpac.Coordinates([[0, 1, 2, 3], [0, 1]], dims=["lat", "lon"]) + + class MyNode1(Node): + outputs = ["a", "b", "c"] + + @node_eval + def eval(self, coordinates, output=None): + return self.create_output_array(coordinates) + + # don't extract when no output field is requested + node = MyNode1() + out = node.eval(coords) + assert out.shape == (4, 2, 3) + + # do extract when an output field is requested + node = MyNode1(output="b") + out = node.eval(coords) + assert out.shape == (4, 2) + + # should still work if the node has already extracted it + class MyNode2(Node): + outputs = ["a", "b", "c"] + + @node_eval + def eval(self, coordinates, output=None): + out = self.create_output_array(coordinates) + return out.sel(output=self.output) + + node = MyNode2(output="b") + out = node.eval(coords) + assert out.shape == (4, 2) + + class TestCaching(object): @classmethod def setup_class(cls): @@ -256,9 +372,10 @@ def test_put_overwrite(self): assert self.node.get_cache("test") == 0 with pytest.raises(NodeException): - self.node.put_cache(1, "test") + self.node.put_cache(1, "test", overwrite=False) + assert self.node.get_cache("test") == 0 - self.node.put_cache(1, "test", overwrite=True) + self.node.put_cache(1, "test") assert self.node.get_cache("test") == 1 def test_rem_all(self): @@ -328,249 +445,183 @@ def test_rem_key_coordinates(self): assert self.node.has_cache("c", coordinates=self.coords2) assert self.node.has_cache("d", coordinates=self.coords) - def test_cache_property_decorator(self): - class Test(podpac.Node): - a = tl.Int(1).tag(attr=True) - b = tl.Int(1).tag(attr=True) - c = tl.Int(1) - d = tl.Int(1) - - @podpac.core.node.cache_func("a2", "a") - def a2(self): - """a2 docstring""" - return self.a * 2 - - @podpac.core.node.cache_func("b2") - def b2(self): - """ b2 docstring """ - return self.b * 2 - - @podpac.core.node.cache_func("c2", "c") - def c2(self): - """ c2 docstring """ - return self.c * 2 - - @podpac.core.node.cache_func("d2") - def d2(self): - """ d2 docstring """ - return self.d * 2 - - t = Test(cache_ctrl=CacheCtrl([RamCacheStore()])) - t2 = Test(cache_ctrl=CacheCtrl([RamCacheStore()])) - t.rem_cache(key="*", coordinates="*") - t2.rem_cache(key="*", coordinates="*") + # node definition errors + # this demonstrates both classes of error in the has_cache case, but only one for put/get/rem + # we could test both classes for put/get/rem as well, but that is not really necessary + def test_has_cache_unavailable_circular(self): + class MyNode(Node): + a = tl.Any().tag(attr=True) - try: - t.get_cache("a2") - raise Exception("Cache should be cleared.") - except podpac.NodeException: - pass + @tl.default("a") + def _default_a(self): + return self.b - assert t.a2() == 2 - assert t.b2() == 2 - assert t.c2() == 2 - assert t.d2() == 2 - assert t2.a2() == 2 - assert t2.b2() == 2 - assert t2.c2() == 2 - assert t2.d2() == 2 - - t.set_trait("a", 2) - assert t.a2() == 4 - t.set_trait("b", 2) - assert t.b2() == 4 # This happens because the node definition changed - t.rem_cache(key="*", coordinates="*") - assert t.c2() == 2 # This forces the cache to update based on the new node definition - assert t.d2() == 2 # This forces the cache to update based on the new node definition - t.c = 2 - assert t.c2() == 4 # This happens because of depends - t.d = 2 - assert t.d2() == 2 # No depends, and doesn't have a tag - - # These should not change - assert t2.a2() == 2 - assert t2.b2() == 2 - assert t2.c2() == 2 - assert t2.d2() == 2 - - t2.set_trait("a", 2) - assert t2.get_cache("a2") == 4 # This was cached by t - t2.set_trait("b", 2) - assert t2.get_cache("c2") == 4 # This was cached by t - assert t2.get_cache("d2") == 2 # This was cached by t - - def test_cache_func_decorator_with_no_cache(self): - class Test(podpac.Node): - a = tl.Int(1).tag(attr=True) - b = tl.Int(1).tag(attr=True) - c = tl.Int(1) - d = tl.Int(1) - - @podpac.core.node.cache_func("a2", "a") - def a2(self): - """a2 docstring""" - return self.a * 2 - - @podpac.core.node.cache_func("b2") - def b2(self): - """ b2 docstring """ - return self.b * 2 - - @podpac.core.node.cache_func("c2", "c") - def c2(self): - """ c2 docstring """ - return self.c * 2 - - @podpac.core.node.cache_func("d2") - def d2(self): - """ d2 docstring """ - return self.d * 2 - - t = Test(cache_ctrl=None) - t2 = Test(cache_ctrl=None) - t.rem_cache(key="*", coordinates="*") - t2.rem_cache(key="*", coordinates="*") + @property + def b(self): + self.has_cache("b") + return 10 - try: - t.get_cache("a2") - raise Exception("Cache should be cleared.") - except podpac.NodeException: - pass + node = MyNode(cache_ctrl=["ram"]) + with pytest.raises(NodeException, match="Cache unavailable, node definition has a circular dependency"): + assert node.b == 10 + + def test_has_cache_unavailable_uninitialized(self): + class MyNode(Node): + a = tl.Any().tag(attr=True) + + @tl.validate("a") + def _validate_a(self, d): + self.b + return d["value"] + + @property + def b(self): + self.has_cache("key") + return 10 + + with pytest.raises(NodeException, match="Cache unavailable, node is not yet fully initialized"): + node = MyNode(a=3, cache_ctrl=["ram"]) + + def test_put_cache_unavailable_uninitialized(self): + class MyNode(Node): + a = tl.Any().tag(attr=True) + + @tl.validate("a") + def _validate_a(self, d): + self.b + return d["value"] + + @property + def b(self): + self.put_cache(10, "key") + return 10 + + with pytest.raises(NodeException, match="Cache unavailable"): + node = MyNode(a=3, cache_ctrl=["ram"]) + + def test_get_cache_unavailable_uninitialized(self): + class MyNode(Node): + a = tl.Any().tag(attr=True) - assert t.a2() == 2 - assert t.b2() == 2 - assert t.c2() == 2 - assert t.d2() == 2 - assert t2.a2() == 2 - assert t2.b2() == 2 - assert t2.c2() == 2 - assert t2.d2() == 2 - - t.set_trait("a", 2) - assert t.a2() == 4 - t.set_trait("b", 2) - assert t.b2() == 4 # This happens because the node definition changed - t.rem_cache(key="*", coordinates="*") - assert t.c2() == 2 # This forces the cache to update based on the new node definition - assert t.d2() == 2 # This forces the cache to update based on the new node definition - t.c = 2 - assert t.c2() == 4 # This happens because of depends - t.d = 2 - assert t.d2() == 4 # No caching here, so it SHOULD update - - # These should not change - assert t2.a2() == 2 - assert t2.b2() == 2 - assert t2.c2() == 2 - assert t2.d2() == 2 + @tl.validate("a") + def _validate_a(self, d): + self.b + return d["value"] + + @property + def b(self): + self.get_cache("key") + return 10 + + with pytest.raises(NodeException, match="Cache unavailable"): + node = MyNode(a=3, cache_ctrl=["ram"]) + + def test_rem_cache_unavailable_uninitialized(self): + class MyNode(Node): + a = tl.Any().tag(attr=True) + + @tl.validate("a") + def _validate_a(self, d): + self.b + return d["value"] + + @property + def b(self): + self.rem_cache("key") + return 10 + + with pytest.raises(NodeException, match="Cache unavailable"): + node = MyNode(a=3, cache_ctrl=["ram"]) class TestSerialization(object): @classmethod def setup_class(cls): a = podpac.algorithm.Arange() - b = podpac.data.Array(source=[10, 20, 30], native_coordinates=podpac.Coordinates([[0, 1, 2]], dims=["lat"])) - c = podpac.compositor.OrderedCompositor(sources=np.array([a, b])) + b = podpac.data.Array(source=[10, 20, 30], coordinates=podpac.Coordinates([[0, 1, 2]], dims=["lat"])) + c = podpac.compositor.OrderedCompositor(sources=[a, b]) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Insecure evaluation.*") cls.node = podpac.algorithm.Arithmetic(A=a, B=b, C=c, eqn="A + B + C") - cls.node_file_path = "node.json" - if os.path.exists(cls.node_file_path): - os.remove(cls.node_file_path) - - @classmethod - def teardown_class(cls): - if os.path.exists(cls.node_file_path): - os.remove(cls.node_file_path) - def test_base_ref(self): - n = Node() - assert isinstance(n.base_ref, str) + node = Node() + assert isinstance(node.base_ref, six.string_types) def test_base_definition(self): - class N(Node): + node = Node() + d = node._base_definition + assert "node" in d + assert isinstance(d["node"], six.string_types) + + def test_base_definition_attrs(self): + class MyNode(Node): my_attr = tl.Int().tag(attr=True) - my_node_attr = tl.Instance(Node).tag(attr=True) - a = Node() - node = N(my_attr=7, my_node_attr=a) + node = MyNode(my_attr=7) - d = node.base_definition - assert isinstance(d, OrderedDict) - assert "node" in d - assert isinstance(d["node"], str) - assert "attrs" in d - assert isinstance(d["attrs"], OrderedDict) - assert "my_attr" in d["attrs"] + d = node._base_definition assert d["attrs"]["my_attr"] == 7 - assert isinstance(d["lookup_attrs"], OrderedDict) - assert "my_node_attr" in d["lookup_attrs"] - assert d["lookup_attrs"]["my_node_attr"] is a - def test_base_definition_multiple_outputs(self): - n = Node() - d = n.base_definition - if "attrs" in d: - assert "outputs" not in d["attrs"] - assert "output" not in d["attrs"] + def test_base_definition_inputs(self): + class MyNode(Node): + my_attr = NodeTrait().tag(attr=True) - n = Node(outputs=["a", "b"]) - d = n.base_definition - assert "attrs" in d - assert "outputs" in d["attrs"] - assert "output" not in d["attrs"] + a = Node() + node = MyNode(my_attr=a) - n = Node(outputs=["a", "b"], output="b") - d = n.base_definition - assert "attrs" in d - assert "outputs" in d["attrs"] - assert "output" in d["attrs"] + d = node._base_definition + assert d["inputs"]["my_attr"] == a - def test_base_definition_units(self): - n = Node(units="meters") + def test_base_definition_inputs_array(self): + class MyNode(Node): + my_attr = ArrayTrait().tag(attr=True) - d = n.base_definition - assert "attrs" in d - assert isinstance(d["attrs"], OrderedDict) - assert "units" in d["attrs"] - assert d["attrs"]["units"] == "meters" + a = Node() + b = Node() + node = MyNode(my_attr=[a, b]) - n = Node() - d = n.base_definition - assert "units" not in d + d = node._base_definition + assert d["inputs"]["my_attr"][0] == a + assert d["inputs"]["my_attr"][1] == b - def test_base_definition_array_attr(self): - class N(Node): - my_attr = ArrayTrait().tag(attr=True) + def test_base_definition_inputs_dict(self): + class MyNode(Node): + my_attr = tl.Dict().tag(attr=True) - node = N(my_attr=np.ones((2, 3, 4))) - d = node.base_definition - my_attr = np.array(d["attrs"]["my_attr"]) - np.testing.assert_array_equal(my_attr, node.my_attr) + a = Node() + b = Node() + node = MyNode(my_attr={"a": a, "b": b}) - def test_base_definition_coordinates_attr(self): - class N(Node): - my_attr = tl.Instance(podpac.Coordinates).tag(attr=True) + d = node._base_definition + assert d["inputs"]["my_attr"]["a"] == a + assert d["inputs"]["my_attr"]["b"] == b - node = N(my_attr=podpac.Coordinates([[0, 1], [1, 2, 3]], dims=["lat", "lon"])) - d = node.base_definition - assert d["attrs"]["my_attr"] == node.my_attr + def test_base_definition_style(self): + node = Node(style=Style(name="test")) + d = node._base_definition + assert "style" in node._base_definition - def test_base_definition_unserializable(self): - class N(Node): - my_attr = tl.Instance(xr.DataArray).tag(attr=True) + def test_base_definition_remove_unnecessary_attrs(self): + node = Node(outputs=["a", "b"], output="a", units="m") + d = node._base_definition + assert "outputs" in d["attrs"] + assert "output" in d["attrs"] + assert "units" in d["attrs"] - node = N(my_attr=xr.DataArray([0, 1])) - with pytest.raises(NodeException, match="Cannot serialize attr 'my_attr'"): - node.base_definition + node = Node() + d = node._base_definition + if "attrs" in d: + assert "outputs" not in d["attrs"] + assert "output" not in d["attrs"] + assert "units" not in d["attrs"] def test_definition(self): # definition d = self.node.definition assert isinstance(d, OrderedDict) - assert len(d) == 4 + assert len(d) == 5 # from_definition with warnings.catch_warnings(): @@ -578,7 +629,7 @@ def test_definition(self): node = Node.from_definition(d) assert node is not self.node - assert node.hash == self.node.hash + assert node == self.node assert isinstance(node, podpac.algorithm.Arithmetic) assert isinstance(node.inputs["A"], podpac.algorithm.Arange) assert isinstance(node.inputs["B"], podpac.data.Array) @@ -588,49 +639,40 @@ def test_definition_duplicate_base_ref(self): n1 = Node(units="m") n2 = Node(units="ft") n3 = Node(units="in") - n = podpac.compositor.OrderedCompositor(sources=[n1, n2, n3]) - d = n.definition + node = podpac.compositor.OrderedCompositor(sources=[n1, n2, n3]) + d = node.definition assert n1.base_ref == n2.base_ref == n3.base_ref - assert len(d) == 4 - - def test_definition_lookup_attrs(self): - global MyNodeWithNodeAttr + assert len(d) == 5 - class MyNodeWithNodeAttr(Node): - my_node_attr = tl.Instance(Node).tag(attr=True) + def test_definition_inputs_array(self): + global MyNodeWithArrayInput - node = MyNodeWithNodeAttr(my_node_attr=podpac.algorithm.Arange()) - d = node.definition - assert isinstance(d, OrderedDict) - assert len(d) == 2 + class MyNodeWithArrayInput(Node): + my_array = ArrayTrait().tag(attr=True) - node2 = Node.from_definition(d) - assert node2 is not node - assert node2.hash == node.hash - assert isinstance(node2, MyNodeWithNodeAttr) - assert isinstance(node2.my_node_attr, podpac.algorithm.Arange) + node1 = MyNodeWithArrayInput(my_array=[podpac.algorithm.Arange()]) + node2 = Node.from_definition(node1.definition) + assert node2 is not node1 and node2 == node1 - def test_definition_lookup_source(self): - global MyNodeWithNodeSource + def test_definition_inputs_dict(self): + global MyNodeWithDictInput - class MyNodeWithNodeSource(podpac.data.DataSource): - source = tl.Instance(Node) + class MyNodeWithDictInput(Node): + my_dict = tl.Dict().tag(attr=True) - node = MyNodeWithNodeSource(source=podpac.algorithm.Arange()) - d = node.definition - assert isinstance(d, OrderedDict) - assert len(d) == 2 + node1 = MyNodeWithDictInput(my_dict={"a": podpac.algorithm.Arange()}) + node2 = Node.from_definition(node1.definition) + assert node2 is not node1 and node2 == node1 - node2 = Node.from_definition(d) - assert node2 is not node - assert node2.hash == node.hash - assert isinstance(node2, MyNodeWithNodeSource) - assert isinstance(node2.source, podpac.algorithm.Arange) + def test_definition_version(self): + d = self.node.definition + assert "podpac_version" in d + assert d["podpac_version"] == podpac.__version__ def test_json(self): # json s = self.node.json - assert isinstance(s, str) + assert isinstance(s, six.string_types) assert json.loads(s) # test from_json @@ -638,33 +680,35 @@ def test_json(self): warnings.filterwarnings("ignore", "Insecure evaluation.*") node = Node.from_json(s) assert node is not self.node - assert node.hash == self.node.hash + assert node == self.node assert isinstance(node, podpac.algorithm.Arithmetic) assert isinstance(node.inputs["A"], podpac.algorithm.Arange) assert isinstance(node.inputs["B"], podpac.data.Array) assert isinstance(node.inputs["C"], podpac.compositor.OrderedCompositor) def test_file(self): - # save - self.node.save(self.node_file_path) + path = tempfile.mkdtemp(prefix="podpac-test-") + filename = os.path.join(path, "node.json") - assert os.path.exists(self.node_file_path) + # save + self.node.save(filename) + assert os.path.exists(filename) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Insecure evaluation.*") - node = Node.load(self.node_file_path) + node = Node.load(filename) assert node is not self.node - assert node.hash == self.node.hash + assert node == self.node assert isinstance(node, podpac.algorithm.Arithmetic) assert isinstance(node.inputs["A"], podpac.algorithm.Arange) assert isinstance(node.inputs["B"], podpac.data.Array) assert isinstance(node.inputs["C"], podpac.compositor.OrderedCompositor) def test_json_pretty(self): - n = Node() - s = n.json_pretty - assert isinstance(s, str) + node = Node() + s = node.json_pretty + assert isinstance(s, six.string_types) json.loads(s) def test_hash(self): @@ -683,6 +727,88 @@ class M(Node): assert n1.hash != n3.hash assert n1.hash != m1.hash + def test_hash_preserves_definition(self): + n = Node() + d_before = deepcopy(n.definition) + h = n.hash + d_after = deepcopy(n.definition) + + assert d_before == d_after + + def test_hash_omit_style(self): + class N(Node): + my_attr = tl.Int().tag(attr=True) + + n1 = N(my_attr=1, style=Style(name="a")) + n2 = N(my_attr=1, style=Style(name="b")) + + # json has style in it + assert n1.json != n2.json + + # but hash does not + assert n1.hash == n2.hash + + def test_hash_omit_version(self): + version = podpac.__version__ + + try: + # actual version + n1 = Node() + s1 = n1.json + h1 = n1.hash + + # spoof different version + podpac.__version__ = "other" + n2 = Node() + s2 = n2.json + h2 = n2.hash + + # JSON should be different, but hash should be the same + assert s1 != s2 + assert h1 == h2 + + finally: + # reset version + podpac.__version__ = version + + def test_eq(self): + class N(Node): + my_attr = tl.Int().tag(attr=True) + + class M(Node): + my_attr = tl.Int().tag(attr=True) + + n1 = N(my_attr=1) + n2 = N(my_attr=1) + n3 = N(my_attr=2) + m1 = M(my_attr=1) + + # eq + assert n1 == n2 + assert not n1 == n3 + assert not n1 == m1 + assert not n1 == "other" + + # ne + assert not n1 != n2 + assert n1 != n3 + assert n1 != m1 + assert n1 != "other" + + def test_eq_ignore_style(self): + class N(Node): + my_attr = tl.Int().tag(attr=True) + + n1 = N(my_attr=1, style=Style(name="a")) + n2 = N(my_attr=1, style=Style(name="b")) + + # json has style in it + assert n1.json != n2.json + + # but == and != don't care + assert n1 == n2 + assert not n1 != n2 + def test_from_url(self): url = ( r"http://testwms/?map=map&&service={service}&request=GetMap&{layername}={layer}&styles=&format=image%2Fpng" @@ -704,16 +830,10 @@ def test_from_url(self): ): pipe = Node.from_url(url.format(service=service, layername=layername, layer=layer, params=param)) - def test_pipeline(self): - n = Node() - with pytest.warns(DeprecationWarning): - p = n.pipeline - assert isinstance(p, podpac.pipeline.Pipeline) - def test_style(self): node = podpac.data.Array( source=[10, 20, 30], - native_coordinates=podpac.Coordinates([[0, 1, 2]], dims=["lat"]), + coordinates=podpac.Coordinates([[0, 1, 2]], dims=["lat"]), style=Style(name="test", units="m"), ) @@ -729,10 +849,24 @@ def test_style(self): assert node2.style.units == "m" # default style - node = podpac.data.Array(source=[10, 20, 30], native_coordinates=podpac.Coordinates([[0, 1, 2]], dims=["lat"])) + node = podpac.data.Array(source=[10, 20, 30], coordinates=podpac.Coordinates([[0, 1, 2]], dims=["lat"])) d = node.definition assert "style" not in d[node.base_ref] + def test_circular_definition(self): + # this is admittedly a contrived example in order to demonstrate the most direct case + class MyNode(Node): + a = tl.Any().tag(attr=True) + + @tl.default("a") + def _default_a(self): + self.definition + return 10 + + node = MyNode() + with pytest.raises(NodeDefinitionError, match="node definition has a circular dependency"): + node.a + class TestUserDefinition(object): def test_empty(self): @@ -756,476 +890,69 @@ def test_invalid_node(self): with pytest.raises(ValueError, match="class 'Nonexistent' not found in module"): Node.from_json(s) - def test_datasource_source(self): - # basic - s = """ - { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.DataSource) - assert node.source == "my_data_string" - - # not required - s = """ - { - "mydata": { - "node": "data.DataSource" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.DataSource) - - # incorrect - s = """ - { - "mydata": { - "node": "data.DataSource", - "attrs": { - "source": "my_data_string" - } - } - } - """ - - with pytest.raises(ValueError, match="DataSource 'attrs' cannot have a 'source' property"): - node = Node.from_json(s) - - def test_datasource_lookup_source(self): - # sub-node - s = """ - { - "a": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "b": { - "node": "data.DataSource", - "lookup_source": "a.source" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.DataSource) - assert node.source == "my_data_string" - - # nonexistent node - s = """ - { - "a": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "b": { - "node": "data.DataSource", - "lookup_source": "nonexistent.source" - } - } - """ - - with pytest.raises(ValueError, match="reference to nonexistent node/attribute"): - Node.from_json(s) - - # nonexistent subattr - s = """ - { - "a": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "b": { - "node": "data.DataSource", - "lookup_source": "a.nonexistent" - } - } - """ - - with pytest.raises(ValueError, match="reference to nonexistent node/attribute"): - Node.from_json(s) - - # nonexistent subsubattr + def test_inputs(self): + # invalid type s = """ { "a": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "b": { - "node": "data.DataSource", - "lookup_source": "a.source.nonexistent" - } - } - """ - - with pytest.raises(ValueError, match="reference to nonexistent node/attribute"): - Node.from_json(s) - - # in attrs (incorrect) - s = """ - { - "mydata": { - "node": "data.DataSource", - "attrs": { - "lookup_source": "my_data_string" - } + "node": "algorithm.Min", + "inputs": { "source": 10 } } } """ - with pytest.raises(ValueError, match="DataSource 'attrs' cannot have a 'lookup_source' property"): + with pytest.raises(ValueError, match="Invalid definition for node"): Node.from_json(s) - def test_reprojected_source_lookup_source(self): - # NOTE: nonexistent node/attribute references are tested in test_datasource_lookup_source - - # lookup_source - s = """ - { - "mysource": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "reprojected": { - "node": "data.ReprojectedSource", - "lookup_source": "mysource" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.ReprojectedSource) - assert isinstance(node.source, podpac.data.DataSource) - assert node.source.source == "my_data_string" - - # lookup_source subattr - s = """ - { - "mysource": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "mean": { - "node": "algorithm.Mean", - "inputs": {"source": "mysource"} - }, - "reprojected": { - "node": "data.ReprojectedSource", - "lookup_source": "mean.source" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.ReprojectedSource) - assert isinstance(node.source, podpac.data.DataSource) - assert node.source.source == "my_data_string" - - # 'source' should fail - s = """ - { - "mysource": { - "node": "data.DataSource", - "source": "my_data_string" - }, - "reprojected": { - "node": "data.ReprojectedSource", - "source": "mysource" - } - } - """ - - with pytest.raises(tl.TraitError): - Node.from_json(s) - - def test_array_source(self): - s = """ - { - "mysource": { - "node": "data.Array", - "source": [0, 1, 2] - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.Array) - np.testing.assert_array_equal(node.source, [0, 1, 2]) - - def test_array_lookup_source(self): - s = """ - { - "a": { - "node": "data.Array", - "source": [0, 1, 2] - }, - "b": { - "node": "data.Array", - "lookup_source": "a.source" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.Array) - np.testing.assert_array_equal(node.source, [0, 1, 2]) - - # 'source' should fail + # nonexistent node s = """ { "a": { - "node": "data.Array", - "source": [0, 1, 2] - }, - "b": { - "node": "data.Array", - "source": "a.source" + "node": "algorithm.Min", + "inputs": { "source": "nonexistent" } } } """ - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Invalid definition for node"): Node.from_json(s) - def test_algorithm_inputs(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Insecure evaluation.*") - # NOTE: nonexistent node/attribute references are tested in test_datasource_lookup_source - - # basic - s = """ - { - "source1": {"node": "algorithm.Arange"}, - "source2": {"node": "algorithm.CoordData"}, - "result": { - "node": "algorithm.Arithmetic", - "inputs": { - "A": "source1", - "B": "source2" - }, - "attrs": { - "eqn": "A + B" - } - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.algorithm.Arithmetic) - assert isinstance(node.inputs["A"], podpac.algorithm.Arange) - assert isinstance(node.inputs["B"], podpac.algorithm.CoordData) - - # sub-node - s = """ - { - "mysource": {"node": "algorithm.Arange"}, - "mean": { - "node": "algorithm.Mean", - "inputs": { "source": "mysource" } - }, - "double": { - "node": "algorithm.Arithmetic", - "inputs": { "A": "mean.source" }, - "attrs": { "eqn": "2 * A" } - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.algorithm.Arithmetic) - assert isinstance(node.inputs["A"], podpac.algorithm.Arange) - - # in attrs (incorrect) - s = """ - { - "source1": {"node": "algorithm.Arange"}, - "source2": {"node": "algorithm.CoordData"}, - "result": { - "node": "algorithm.Arithmetic", - "attrs": { - "inputs": { - "A": "source1", - "B": "source2" - }, - "eqn": "A + B" - } - } - } - """ - - with pytest.raises(ValueError, match="Algorithm 'attrs' cannot have an 'inputs' property"): - Node.from_json(s) - - def test_compositor_sources(self): - # NOTE: nonexistent node/attribute references are tested in test_datasource_lookup_source - - # basic - s = """ - { - "a": {"node": "algorithm.Arange"}, - "b": {"node": "algorithm.CoordData"}, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["a", "b"] - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.compositor.OrderedCompositor) - assert isinstance(node.sources[0], podpac.algorithm.Arange) - assert isinstance(node.sources[1], podpac.algorithm.CoordData) - - # sub-node - s = """ - { - "source1": {"node": "algorithm.Arange"}, - "mean1": { - "node": "algorithm.Mean", - "inputs": {"source": "source1"} - }, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["mean1.source", "source1"] - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.compositor.OrderedCompositor) - assert isinstance(node.sources[0], podpac.algorithm.Arange) - assert isinstance(node.sources[1], podpac.algorithm.Arange) - - def test_datasource_interpolation(self): - s = """ - { - "mydata": { - "node": "data.DataSource", - "source": "my_data_string", - "interpolation": "nearest" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.DataSource) - assert node.interpolation == "nearest" - - # not required - s = """ - { - "mydata": { - "node": "data.DataSource" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.data.DataSource) - - # incorrect - s = """ - { - "mydata": { - "node": "data.DataSource", - "attrs": { - "interpolation": "nearest" - } - } - } - """ - - with pytest.raises(ValueError, match="DataSource 'attrs' cannot have an 'interpolation' property"): - Node.from_json(s) - - def test_compositor_interpolation(self): - s = """ - { - "a": { - "node": "algorithm.Arange" - }, - "b": { - "node": "algorithm.Arange" - }, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["a", "b"], - "interpolation": "nearest" - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.compositor.OrderedCompositor) - assert node.interpolation == "nearest" - - # not required + def test_lookup_attrs(self): s = """ { "a": { - "node": "algorithm.Arange" + "node": "algorithm.CoordData", + "attrs": { "coord_name": "lat" } }, "b": { - "node": "algorithm.Arange" - }, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["a", "b"] + "node": "algorithm.CoordData", + "lookup_attrs": { "coord_name": "a.coord_name" } } } """ node = Node.from_json(s) - assert isinstance(node, podpac.compositor.OrderedCompositor) + assert isinstance(node, podpac.algorithm.CoordData) + assert node.coord_name == "lat" - # incorrect + # invalid type s = """ { "a": { - "node": "algorithm.Arange" + "node": "algorithm.CoordData", + "attrs": { "coord_name": "lat" } }, "b": { - "node": "algorithm.Arange" - }, - "c": { - "node": "compositor.OrderedCompositor", - "sources": ["a", "b"], - "attrs": { - "interpolation": "nearest" - } + "node": "algorithm.CoordData", + "lookup_attrs": { "coord_name": 10 } } } """ - with pytest.raises(ValueError, match="Compositor 'attrs' cannot have an 'interpolation' property"): + with pytest.raises(ValueError, match="Invalid definition for node"): Node.from_json(s) - def test_attrs(self): - s = """ - { - "sm": { - "node": "datalib.smap.SMAP", - "attrs": { - "product": "SPL4SMGP" - } - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, podpac.datalib.smap.SMAP) - assert node.product == "SPL4SMGP" - - def test_lookup_attrs(self): - # NOTE: nonexistent node/attribute references are tested in test_datasource_lookup_source - + # nonexistent node s = """ { "a": { @@ -1234,41 +961,15 @@ def test_lookup_attrs(self): }, "b": { "node": "algorithm.CoordData", - "lookup_attrs": { "coord_name": "a.coord_name" } + "lookup_attrs": { "coord_name": "nonexistent.coord_name" } } } """ - node = Node.from_json(s) - assert isinstance(node, podpac.algorithm.CoordData) - assert node.coord_name == "lat" - - # lookup node directly (instead of a sub-attr) - global MyNodeWithNodeAttr - - class MyNodeWithNodeAttr(Node): - my_node_attr = tl.Instance(Node).tag(attr=True) - - s = """ - { - "mysource": { - "node": "data.DataSource" - }, - "mynode": { - "plugin": "test_node", - "node": "MyNodeWithNodeAttr", - "lookup_attrs": { - "my_node_attr": "mysource" - } - } - } - """ - - node = Node.from_json(s) - assert isinstance(node, MyNodeWithNodeAttr) - assert isinstance(node.my_node_attr, podpac.data.DataSource) + with pytest.raises(ValueError, match="Invalid definition for node"): + Node.from_json(s) - # attrs should not work + # nonexistent subattr s = """ { "a": { @@ -1277,13 +978,13 @@ class MyNodeWithNodeAttr(Node): }, "b": { "node": "algorithm.CoordData", - "attrs": { "coord_name": "a.coord_name" } + "lookup_attrs": { "coord_name": "a.nonexistent" } } } """ - node = Node.from_json(s) - assert node.coord_name == "a.coord_name" # this will fail at evaluation + with pytest.raises(ValueError, match="Invalid definition for node"): + Node.from_json(s) def test_invalid_property(self): s = """ @@ -1337,18 +1038,18 @@ def test_debuggable(self): }, "mean": { "node": "algorithm.SpatialConvolution", - "inputs": {"source": "a"}, + "lookup_attrs": {"source": "a"}, "attrs": {"kernel_type": "mean,3"} }, "c": { "node": "algorithm.Arithmetic", - "inputs": {"A": "a", "B": "mean"}, + "lookup_attrs": {"A": "a", "B": "mean"}, "attrs": {"eqn": "a-b"} } } """ - with warnings.catch_warnings(): + with warnings.catch_warnings(), podpac.settings: warnings.filterwarnings("ignore", "Insecure evaluation.*") # normally node objects can and should be re-used @@ -1361,6 +1062,56 @@ def test_debuggable(self): node = Node.from_json(s) assert node.inputs["A"] is not node.inputs["B"].source + def test_from_definition_version_warning(self): + s = """ + { + "a": { + "node": "algorithm.Arange" + }, + "podpac_version": "other" + } + """ + + with pytest.warns(UserWarning, match="node definition version mismatch"): + node = Node.from_json(s) + + +class TestNoCacheMixin(object): + class NoCacheNode(NoCacheMixin, Node): + pass + + def test_default_no_cache(self): + with podpac.settings: + podpac.settings["DEFAULT_CACHE"] = ["ram"] + node = self.NoCacheNode() + assert len(node.cache_ctrl._cache_stores) == 0 + + def test_customizable(self): + podpac.settings["DEFAULT_CACHE"] = ["ram"] + node = self.NoCacheNode(cache_ctrl=["ram"]) + assert len(node.cache_ctrl._cache_stores) == 1 + + +class TestDiskCacheMixin(object): + class DiskCacheNode(DiskCacheMixin, Node): + pass + + def test_default_disk_cache(self): + with podpac.settings: + # add disk cache + podpac.settings["DEFAULT_CACHE"] = ["ram"] + node = self.DiskCacheNode() + assert len(node.cache_ctrl._cache_stores) == 2 + + # don't add if it is already there + podpac.settings["DEFAULT_CACHE"] = ["ram", "disk"] + node = self.DiskCacheNode() + assert len(node.cache_ctrl._cache_stores) == 2 + + def test_customizable(self): + node = self.DiskCacheNode(cache_ctrl=["ram"]) + assert len(node.cache_ctrl._cache_stores) == 1 + # TODO: remove this - this is currently a placeholder test until we actually have integration tests (pytest will exit with code 5 if no tests found) @pytest.mark.integration diff --git a/podpac/core/test/test_settings.py b/podpac/core/test/test_settings.py index faf445c29..cf5e818fa 100644 --- a/podpac/core/test/test_settings.py +++ b/podpac/core/test/test_settings.py @@ -27,8 +27,8 @@ def teardown_method(self): def test_settings_file_defaults_to_home_dir(self): self.make_settings_tmp_dir() # so teardown method has something ot tear down settings = PodpacSettings() - path = os.path.expanduser("~") - assert settings.settings_path == os.path.join(path, ".podpac", "settings.json") + path = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~")) + assert settings.settings_path == os.path.join(path, ".config", "podpac", "settings.json") def test_single_saved_setting_persists(self): path = self.make_settings_tmp_dir() @@ -73,5 +73,5 @@ def test_misconfigured_settings_file_fall_back_on_default(self): settings.load(path=path) assert isinstance(settings, dict) - path = os.path.expanduser("~") - assert settings.settings_path == os.path.join(path, ".podpac", "settings.json") + path = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~")) + assert settings.settings_path == os.path.join(path, ".config", "podpac", "settings.json") diff --git a/podpac/core/test/test_units.py b/podpac/core/test/test_units.py index a73f5e72e..81b2f57ca 100644 --- a/podpac/core/test/test_units.py +++ b/podpac/core/test/test_units.py @@ -1,6 +1,7 @@ from __future__ import division, unicode_literals, print_function, absolute_import import io +import tempfile import pytest import numpy as np @@ -13,7 +14,6 @@ from podpac.core.units import ureg from podpac.core.units import UnitsDataArray from podpac.core.units import to_image -from podpac.core.units import create_dataarray # DEPRECATED from podpac.data import Array, Rasterio @@ -445,10 +445,6 @@ def test_invalid_coords(self): with pytest.raises(TypeError): UnitsDataArray.create((3, 4)) - def test_deprecate_create_dataarray(self): - with pytest.deprecated_call(): - create_dataarray(self.coords, data=10) - class TestOpenDataArray(object): def test_open_after_create(self): @@ -482,8 +478,8 @@ def test_open_after_eval(self): lat = np.linspace(-10, 10, 5) lon = np.linspace(-10, 10, 5) native_coords = Coordinates([lat, lon], ["lat", "lon"]) - node = Array(source=data, native_coordinates=native_coords) - uda = node.eval(node.native_coordinates) + node = Array(source=data, coordinates=native_coords) + uda = node.eval(node.coordinates) ncdf = uda.to_netcdf() uda_2 = UnitsDataArray.open(ncdf) @@ -517,7 +513,7 @@ def make_square_array(self, order=1, bands=1): # bands = 3 node = Array( source=np.arange(8 * bands).reshape(3 - order, 3 + order, bands), - native_coordinates=Coordinates([clinspace(4, 0, 2, "lat"), clinspace(1, 4, 4, "lon")][::order]), + coordinates=Coordinates([clinspace(4, 0, 2, "lat"), clinspace(1, 4, 4, "lon")][::order]), outputs=[str(s) for s in list(range(bands))], ) return node @@ -531,7 +527,7 @@ def make_rot_array(self, order=1, bands=1): c = Coordinates([rc]) node = Array( source=np.arange(8 * bands).reshape(3 - order, 3 + order, bands), - native_coordinates=c, + coordinates=c, outputs=[str(s) for s in list(range(bands))], ) return node @@ -539,105 +535,105 @@ def make_rot_array(self, order=1, bands=1): def test_to_geotiff_rountrip_1band(self): # lat/lon order, usual node = self.make_square_array() - out = node.eval(node.native_coordinates) - fp = io.BytesIO() - out.to_geotiff(fp) - fp.write(b"a") # for some reason needed to get good comparison - fp.seek(0) - rnode = Rasterio(source=fp, outputs=node.outputs, mode="r") + out = node.eval(node.coordinates) + with tempfile.NamedTemporaryFile("wb") as fp: + out.to_geotiff(fp) + fp.write(b"a") # for some reason needed to get good comparison - assert node.native_coordinates == rnode.native_coordinates + fp.seek(0) + rnode = Rasterio(source=fp.name, outputs=node.outputs) + assert rnode.coordinates == node.coordinates - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data, rout.data) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(rout.data, out.data) - # lon/lat order, unsual + # lon/lat order, unusual node = self.make_square_array(order=-1) - out = node.eval(node.native_coordinates) - fp = io.BytesIO() - out.to_geotiff(fp) - fp.write(b"a") # for some reason needed to get good comparison - fp.seek(0) - rnode = Rasterio(source=fp, outputs=node.outputs) + out = node.eval(node.coordinates) + with tempfile.NamedTemporaryFile("wb") as fp: + out.to_geotiff(fp) + fp.write(b"a") # for some reason needed to get good comparison - assert node.native_coordinates == rnode.native_coordinates + fp.seek(0) + rnode = Rasterio(source=fp.name, outputs=node.outputs) + assert rnode.coordinates == node.coordinates - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data, rout.data) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(rout.data, out.data) def test_to_geotiff_rountrip_2band(self): # lat/lon order, usual node = self.make_square_array(bands=2) - out = node.eval(node.native_coordinates) - fp = io.BytesIO() - out.to_geotiff(fp) - fp.write(b"a") # for some reason needed to get good comparison - fp.seek(0) - rnode = Rasterio(source=fp, outputs=node.outputs, mode="r") + out = node.eval(node.coordinates) + with tempfile.NamedTemporaryFile("wb") as fp: + out.to_geotiff(fp) + fp.write(b"a") # for some reason needed to get good comparison - assert node.native_coordinates == rnode.native_coordinates + fp.seek(0) + rnode = Rasterio(source=fp.name, outputs=node.outputs) + assert rnode.coordinates == node.coordinates - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data, rout.data) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(rout.data, out.data) # lon/lat order, unsual node = self.make_square_array(order=-1, bands=2) - out = node.eval(node.native_coordinates) - fp = io.BytesIO() - out.to_geotiff(fp) - fp.write(b"a") # for some reason needed to get good comparison - fp.seek(0) - rnode = Rasterio(source=fp, outputs=node.outputs) - - assert node.native_coordinates == rnode.native_coordinates - - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data, rout.data) - - # Check single output - fp.seek(0) - rnode = Rasterio(source=fp, outputs=node.outputs, output=node.outputs[1]) - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data[..., 1], rout.data) - - # Check single band 1 - fp.seek(0) - rnode = Rasterio(source=fp, band=1) - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data[..., 0], rout.data) - - # Check single band 2 - fp.seek(0) - rnode = Rasterio(source=fp, band=2) - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data[..., 1], rout.data) + out = node.eval(node.coordinates) + with tempfile.NamedTemporaryFile("wb") as fp: + out.to_geotiff(fp) + fp.write(b"a") # for some reason needed to get good comparison + + fp.seek(0) + rnode = Rasterio(source=fp.name, outputs=node.outputs) + assert rnode.coordinates == node.coordinates + + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(rout.data, out.data) + + # Check single output + fp.seek(0) + rnode = Rasterio(source=fp.name, outputs=node.outputs, output=node.outputs[1]) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(out.data[..., 1], rout.data) + + # Check single band 1 + fp.seek(0) + rnode = Rasterio(source=fp.name, band=1) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(out.data[..., 0], rout.data) + + # Check single band 2 + fp.seek(0) + rnode = Rasterio(source=fp.name, band=2) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(out.data[..., 1], rout.data) @pytest.mark.skip("TODO: We can remove this skipped test after solving #363") def test_to_geotiff_rountrip_rotcoords(self): # lat/lon order, usual node = self.make_rot_array() - out = node.eval(node.native_coordinates) - fp = io.BytesIO() - out.to_geotiff(fp) - fp.write(b"a") # for some reason needed to get good comparison - fp.seek(0) - rnode = Rasterio(source=fp, outputs=node.outputs, mode="r") + out = node.eval(node.coordinates) + with tempfile.NamedTemporaryFile("wb") as fp: + out.to_geotiff(fp) + fp.write(b"a") # for some reason needed to get good comparison - assert node.native_coordinates == rnode.native_coordinates + fp.seek(0) + rnode = Rasterio(source=fp.name, outputs=node.outputs, mode="r") + assert node.coordinates == rnode.coordinates - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data, rout.data) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(out.data, rout.data) # lon/lat order, unsual node = self.make_square_array(order=-1) - out = node.eval(node.native_coordinates) - fp = io.BytesIO() - out.to_geotiff(fp) - fp.write(b"a") # for some reason needed to get good comparison - fp.seek(0) - rnode = Rasterio(source=fp, outputs=node.outputs) + out = node.eval(node.coordinates) + with tempfile.NamedTemporaryFile("wb") as fp: + out.to_geotiff(fp) + fp.write(b"a") # for some reason needed to get good comparison - assert node.native_coordinates == rnode.native_coordinates + fp.seek(0) + rnode = Rasterio(source=fp.name, outputs=node.outputs) + assert node.coordinates == rnode.coordinates - rout = rnode.eval(rnode.native_coordinates) - np.testing.assert_almost_equal(out.data, rout.data) + rout = rnode.eval(rnode.coordinates) + np.testing.assert_almost_equal(out.data, rout.data) diff --git a/podpac/core/test/test_utils.py b/podpac/core/test/test_utils.py index 54a181875..686dfda00 100644 --- a/podpac/core/test/test_utils.py +++ b/podpac/core/test/test_utils.py @@ -1,33 +1,71 @@ from __future__ import division, unicode_literals, print_function, absolute_import import os +import sys +import json +import datetime +import warnings from collections import OrderedDict import pytest import numpy as np +import pandas as pd +import xarray as xr import traitlets as tl -import sys -import podpac.core.utils as ut + +import podpac +from podpac.core.utils import common_doc +from podpac.core.utils import trait_is_defined +from podpac.core.utils import create_logfile +from podpac.core.utils import OrderedDictTrait, ArrayTrait, TupleTrait, NodeTrait +from podpac.core.utils import JSONEncoder, is_json_serializable +from podpac.core.utils import cached_property +from podpac.core.utils import ind2slice class TestCommonDocs(object): def test_common_docs_does_not_affect_anonymous_functions(self): f = lambda x: x - f2 = ut.common_doc({"key": "value"})(f) + f2 = common_doc({"key": "value"})(f) assert f(42) == f2(42) assert f.__doc__ is None -# TODO: add log testing -class TestLog(object): - def test_create_log(self): - pass +class TestTraitletsHelpers(object): + def test_trait_is_defined(self): + class MyClass(tl.HasTraits): + a = tl.Any() + b = tl.Any(default_value=0) + c = tl.Any() + + @tl.default("c") + def _default_b(self): + return "test" + + x = MyClass(a=1, b=1, c=1) + assert trait_is_defined(x, "a") + assert trait_is_defined(x, "b") + assert trait_is_defined(x, "c") + assert not trait_is_defined(x, "other") + + x = MyClass() + assert trait_is_defined(x, "a") + assert trait_is_defined(x, "b") + assert not trait_is_defined(x, "c") + + x.c + assert trait_is_defined(x, "c") + + +class TestLoggingHelpers(object): + def test_create_logfile(self): + create_logfile() class TestOrderedDictTrait(object): def test(self): class MyClass(tl.HasTraits): - d = ut.OrderedDictTrait() + d = OrderedDictTrait() m = MyClass(d=OrderedDict([("a", 1)])) @@ -37,14 +75,14 @@ class MyClass(tl.HasTraits): @pytest.mark.skipif(sys.version < "3.6", reason="python < 3.6") def test_dict_python36(self): class MyClass(tl.HasTraits): - d = ut.OrderedDictTrait() + d = OrderedDictTrait() m = MyClass(d={"a": 1}) @pytest.mark.skipif(sys.version >= "3.6", reason="python >= 3.6") def test_dict_python2(self): class MyClass(tl.HasTraits): - d = ut.OrderedDictTrait() + d = OrderedDictTrait() with pytest.raises(tl.TraitError): m = MyClass(d={"a": 1}) @@ -56,7 +94,7 @@ class MyClass(tl.HasTraits): class TestArrayTrait(object): def test(self): class MyClass(tl.HasTraits): - a = ut.ArrayTrait() + a = ArrayTrait() # basic usage o = MyClass(a=np.array([0, 4])) @@ -68,14 +106,9 @@ class MyClass(tl.HasTraits): assert isinstance(o.a, np.ndarray) np.testing.assert_equal(o.a, [0, 4]) - # invalid - # As of numpy 0.16, no longer raises an error - # with pytest.raises(tl.TraitError): - # MyClass(a=[0, [4, 5]]) - def test_ndim(self): class MyClass(tl.HasTraits): - a = ut.ArrayTrait(ndim=2) + a = ArrayTrait(ndim=2) MyClass(a=np.array([[0, 4]])) MyClass(a=[[0, 4]]) @@ -86,7 +119,7 @@ class MyClass(tl.HasTraits): def test_shape(self): class MyClass(tl.HasTraits): - a = ut.ArrayTrait(shape=(2, 2)) + a = ArrayTrait(shape=(2, 2)) MyClass(a=np.array([[0, 1], [2, 3]])) MyClass(a=[[0, 1], [2, 3]]) @@ -97,7 +130,7 @@ class MyClass(tl.HasTraits): def test_dtype(self): class MyClass(tl.HasTraits): - a = ut.ArrayTrait(dtype=float) + a = ArrayTrait(dtype=float) m = MyClass(a=np.array([0.0, 1.0])) assert m.a.dtype == float @@ -115,15 +148,275 @@ class MyClass(tl.HasTraits): def test_args(self): # shape and ndim must match - t = ut.ArrayTrait(ndim=2, shape=(2, 2)) + t = ArrayTrait(ndim=2, shape=(2, 2)) with pytest.raises(ValueError): - ut.ArrayTrait(ndim=1, shape=(2, 2)) + ArrayTrait(ndim=1, shape=(2, 2)) # dtype lookup - t = ut.ArrayTrait(dtype="datetime64") + t = ArrayTrait(dtype="datetime64") assert t.dtype == np.datetime64 # invalid dtype with pytest.raises(ValueError): - ut.ArrayTrait(dtype="notatype") + ArrayTrait(dtype="notatype") + + +class TestNodeTrait(object): + def test(self): + class MyClass(tl.HasTraits): + node = NodeTrait() + + t = MyClass(node=podpac.Node()) + + with pytest.raises(tl.TraitError): + MyClass(node=0) + + def test_debug(self): + class MyClass(tl.HasTraits): + node = NodeTrait() + + node = podpac.Node() + + with podpac.settings: + podpac.settings["DEBUG"] = False + t = MyClass(node=node) + assert t.node is node + + podpac.settings["DEBUG"] = True + t = MyClass(node=node) + assert t.node is not node + + +class TestTupleTrait(object): + def test_trait(self): + class MyClass(tl.HasTraits): + t = TupleTrait(trait=tl.Int()) + + MyClass(t=(1, 2, 3)) + + with pytest.raises(tl.TraitError): + MyClass(t=("a", "b", "c")) + + def test_tuple(self): + class MyClass(tl.HasTraits): + t = TupleTrait(trait=tl.Int()) + + a = MyClass(t=(1, 2, 3)) + assert isinstance(a.t, tuple) + + a = MyClass(t=[1, 2, 3]) + assert isinstance(a.t, tuple) + + +class TestJSONEncoder(object): + def test_coordinates(self): + coordinates = podpac.coordinates.Coordinates([0], dims=["time"]) + json.dumps(coordinates, cls=JSONEncoder) + + def test_node(self): + node = podpac.Node() + json.dumps(node, cls=JSONEncoder) + + def test_style(self): + style = podpac.core.style.Style() + json.dumps(style, cls=JSONEncoder) + + def test_interpolation(self): + interpolation = podpac.data.Interpolation() + json.dumps(interpolation, cls=JSONEncoder) + + def test_interpolator(self): + kls = podpac.data.INTERPOLATORS[0] + json.dumps(kls, cls=JSONEncoder) + + def test_units(self): + units = podpac.core.units.ureg.Unit("meters") + json.dumps(units, cls=JSONEncoder) + + def test_datetime64(self): + dt = np.datetime64() + json.dumps(dt, cls=JSONEncoder) + + def test_timedelta64(self): + td = np.timedelta64() + json.dumps(td, cls=JSONEncoder) + + def test_datetime(self): + now = datetime.datetime.now() + json.dumps(now, cls=JSONEncoder) + + def test_date(self): + today = datetime.date.today() + json.dumps(today, cls=JSONEncoder) + + def test_dataframe(self): + df = pd.DataFrame() + json.dumps(df, cls=JSONEncoder) + + def test_array_datetime64(self): + a = np.array(["2018-01-01", "2018-01-02"]).astype(np.datetime64) + json.dumps(a, cls=JSONEncoder) + + def test_array_timedelta64(self): + a = np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]) + json.dumps(a, cls=JSONEncoder) + + def test_array_numerical(self): + a = np.array([0.0, 1.0, 2.0]) + json.dumps(a, cls=JSONEncoder) + + def test_array_node(self): + a = np.array([podpac.Node(), podpac.Node()]) + json.dumps(a, cls=JSONEncoder) + + def test_array_unserializable(self): + class MyClass(object): + pass + + a = np.array([MyClass()]) + with pytest.raises(TypeError, match="Cannot serialize numpy array"): + json.dumps(a, cls=JSONEncoder) + + def test_unserializable(self): + value = xr.DataArray([]) + with pytest.raises(TypeError, match="not JSON serializable"): + json.dumps(value, cls=JSONEncoder) + + def test_is_json_serializable(self): + assert is_json_serializable("test") + assert not is_json_serializable(xr.DataArray([])) + + +class TestCachedPropertyDecorator(object): + def test_cached_property(self): + class MyNode(podpac.Node): + my_property_called = 0 + my_cached_property_called = 0 + my_cache_ctrl_property_called = 0 + + @property + def my_property(self): + self.my_property_called += 1 + return 10 + + @cached_property + def my_cached_property(self): + self.my_cached_property_called += 1 + return 20 + + @cached_property(use_cache_ctrl=True) + def my_cache_ctrl_property(self): + self.my_cache_ctrl_property_called += 1 + return 30 + + a = MyNode(cache_ctrl=["ram"]) + b = MyNode(cache_ctrl=["ram"]) + c = MyNode(cache_ctrl=[]) + + a.rem_cache(key="*") + b.rem_cache(key="*") + c.rem_cache(key="*") + + # normal property should be called every time + assert a.my_property_called == 0 + assert a.my_property == 10 + assert a.my_property_called == 1 + assert a.my_property == 10 + assert a.my_property == 10 + assert a.my_property_called == 3 + + assert b.my_property_called == 0 + assert b.my_property == 10 + assert b.my_property_called == 1 + assert b.my_property == 10 + assert b.my_property == 10 + assert b.my_property_called == 3 + + assert c.my_property_called == 0 + assert c.my_property == 10 + assert c.my_property_called == 1 + assert c.my_property == 10 + assert c.my_property == 10 + assert c.my_property_called == 3 + + # cached property should only be called when it is accessed + assert a.my_cached_property_called == 0 + assert a.my_cached_property == 20 + assert a.my_cached_property_called == 1 + assert a.my_cached_property == 20 + assert a.my_cached_property == 20 + assert a.my_cached_property_called == 1 + + assert b.my_cached_property_called == 0 + assert b.my_cached_property == 20 + assert b.my_cached_property_called == 1 + assert b.my_cached_property == 20 + assert b.my_cached_property == 20 + assert b.my_cached_property_called == 1 + + assert c.my_cached_property_called == 0 + assert c.my_cached_property == 20 + assert c.my_cached_property_called == 1 + assert c.my_cached_property == 20 + assert c.my_cached_property == 20 + assert c.my_cached_property_called == 1 + + # cache_ctrl cached property should only be called in the first node that accessses it + assert a.my_cache_ctrl_property_called == 0 + assert a.my_cache_ctrl_property == 30 + assert a.my_cache_ctrl_property_called == 1 + assert a.my_cache_ctrl_property == 30 + assert a.my_cache_ctrl_property == 30 + assert a.my_cache_ctrl_property_called == 1 + + assert b.my_cache_ctrl_property_called == 0 + assert b.my_cache_ctrl_property == 30 + assert b.my_cache_ctrl_property_called == 0 + assert b.my_cache_ctrl_property == 30 + assert b.my_cache_ctrl_property == 30 + assert b.my_cache_ctrl_property_called == 0 + + # but only if a cache_ctrl exists for the Node + assert c.my_cache_ctrl_property_called == 0 + assert c.my_cache_ctrl_property == 30 + assert c.my_cache_ctrl_property_called == 1 + assert c.my_cache_ctrl_property == 30 + assert c.my_cache_ctrl_property == 30 + assert c.my_cache_ctrl_property_called == 1 + + def test_invalid_argument(self): + with pytest.raises(TypeError, match="cached_property decorator does not accept keyword argument"): + cached_property(other=True) + + with pytest.raises(TypeError, match="cached_property decorator does not accept any positional arguments"): + cached_property(True) + + +class TestInd2Slice(object): + def test_slice(self): + assert ind2slice((slice(1, 4),)) == (slice(1, 4),) + + def test_integer(self): + assert ind2slice((1,)) == (1,) + + def test_integer_array(self): + assert ind2slice(([1, 2, 4],)) == (slice(1, 5),) + + def test_boolean_array(self): + assert ind2slice(([False, True, True, False, True, False],)) == (slice(1, 5),) + + def test_stepped(self): + assert ind2slice(([1, 3, 5],)) == (slice(1, 7, 2),) + assert ind2slice(([False, True, False, True, False, True],)) == (slice(1, 7, 2),) + + def test_multiindex(self): + I = (slice(1, 4), 1, [1, 2, 4], [False, True, False], [1, 3, 5]) + assert ind2slice(I) == (slice(1, 4), 1, slice(1, 5), 1, slice(1, 7, 2)) + + def test_nontuple(self): + assert ind2slice(slice(1, 4)) == slice(1, 4) + assert ind2slice(1) == 1 + assert ind2slice([1, 2, 4]) == slice(1, 5) + assert ind2slice([False, True, True, False, True, False]) == slice(1, 5) + assert ind2slice([1, 3, 5]) == slice(1, 7, 2) diff --git a/podpac/core/units.py b/podpac/core/units.py index 7f5ff3712..e2242d919 100644 --- a/podpac/core/units.py +++ b/podpac/core/units.py @@ -185,14 +185,27 @@ def to_format(self, format, *args, **kwargs): elif format in ["pickle", "pkl"]: r = cPickle.dumps(self) elif format == "zarr_part": - if part in kwargs: - part = [slice(*sss) for sss in kwargs.pop("part")] + from podpac.core.data.zarr_source import Zarr + import zarr + + if "part" in kwargs: + part = kwargs.pop("part") + part = tuple([slice(*sss) for sss in part]) else: part = slice(None) - zf = zarr.open(*args, **kwargs) - zf[part] = self.data + zn = Zarr(source=kwargs.pop("source")) + store = zn._get_store() + + zf = zarr.open(store, *args, **kwargs) + if "output" in self.dims: + for key in self.coords["output"].data: + zf[key][part] = self.sel(output=key).data + else: + data_key = kwargs.get("data_key", "data") + zf[data_key][part] = self.data + r = zn.source else: try: getattr(self, "to_" + format)(*args, **kwargs) @@ -353,6 +366,8 @@ def open(cls, *args, **kwargs): # pass in kwargs to constructor uda_kwargs = {"attrs": da.attrs} + if "output" in da.dims: + uda_kwargs.update({"outputs": da.coords["output"]}) return cls.create(coords, data=da.data, **uda_kwargs) @classmethod @@ -479,16 +494,6 @@ def func(self, *args, **kwargs): del func -def create_dataarray(coords, data=np.nan, dtype=float, outputs=None, **kwargs): - """Deprecated. Use `UnitsDataArray.create()` in place. - """ - warnings.warn( - "The `create_dataarray` function is deprecated and will be removed in podpac 2.0. Use the classmethod `UnitsDataArray.create()` instead.", - DeprecationWarning, - ) - return UnitsDataArray.create(coords, data=data, outputs=outputs, dtype=dtype, **kwargs) - - def to_image(data, format="png", vmin=None, vmax=None, return_base64=False): """Return a base64-encoded image of data @@ -516,7 +521,9 @@ def to_image(data, format="png", vmin=None, vmax=None, return_base64=False): import matplotlib.cm from matplotlib.image import imsave - matplotlib.use("agg") + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + matplotlib.use("agg") if format != "png": raise ValueError("Invalid image format '%s', must be 'png'" % format) diff --git a/podpac/core/utils.py b/podpac/core/utils.py index bb0929ed3..65b7a40e9 100644 --- a/podpac/core/utils.py +++ b/podpac/core/utils.py @@ -10,19 +10,20 @@ import datetime import functools import importlib -from collections import OrderedDict import logging +from collections import OrderedDict from copy import deepcopy -from six import string_types -import lazy_import try: import urllib.parse as urllib except: # Python 2.7 import urlparse as urllib +from six import string_types +import lazy_import import traitlets as tl import numpy as np +import xarray as xr import pandas as pd # Core dependency of xarray # Optional Imports @@ -54,7 +55,7 @@ def _decorator(func): return _decorator -def trait_is_defined(obj, trait): +def trait_is_defined(obj, trait_name): """Utility method to determine if trait is defined on object without call to default (@tl.default) @@ -62,7 +63,7 @@ def trait_is_defined(obj, trait): ---------- object : object Class with traits - trait : str + trait_name : str Class property to investigate Returns @@ -71,7 +72,7 @@ def trait_is_defined(obj, trait): True if the trait exists on the object and is defined False if the trait does not exist on the object or the trait is not defined """ - return obj.has_trait(trait) and trait in obj._trait_values + return obj.has_trait(trait_name) and trait_name in obj._trait_values def create_logfile( @@ -165,13 +166,7 @@ def __init__(self, ndim=None, shape=None, dtype=None, dtypes=None, *args, **kwar def validate(self, obj, value): # coerce type if not isinstance(value, np.ndarray): - try: - value = np.array(value) - except: - raise tl.TraitError( - "The '%s' trait of an %s instance must be an np.ndarray, but a value of %s %s was specified" - % (self.name, obj.__class__.__name__, value, type(value)) - ) + value = np.array(value) # ndim if self.ndim is not None and self.ndim != value.ndim: @@ -208,9 +203,11 @@ def validate(self, obj, value): return tuple(value) -class NodeTrait(tl.ForwardDeclaredInstance): +class NodeTrait(tl.Instance): def __init__(self, *args, **kwargs): - super(NodeTrait, self).__init__("Node", *args, **kwargs) + from podpac import Node as _Node + + super(NodeTrait, self).__init__(_Node, *args, **kwargs) def validate(self, obj, value): super(NodeTrait, self).validate(obj, value) @@ -221,61 +218,50 @@ def validate(self, obj, value): class JSONEncoder(json.JSONEncoder): def default(self, obj): - # podpac Coordinates objects - if isinstance(obj, podpac.Coordinates): - return obj.definition - - # podpac Node objects - elif isinstance(obj, podpac.Node): - return obj.definition - - # podpac Style objects - elif isinstance(obj, podpac.core.style.Style): + # podpac objects with definitions + if isinstance(obj, (podpac.Coordinates, podpac.Node, podpac.data.Interpolation, podpac.core.style.Style)): return obj.definition - elif isinstance(obj, podpac.data.Interpolation): - return obj.definition + # podpac Interpolator type + if isinstance(obj, type) and obj in podpac.data.INTERPOLATORS: + return obj().definition # pint Units - elif isinstance(obj, podpac.core.units.ureg.Unit): + if isinstance(obj, podpac.core.units.ureg.Unit): return str(obj) - # numpy arrays - elif isinstance(obj, np.ndarray): - if np.issubdtype(obj.dtype, np.datetime64): - return obj.astype(str).tolist() - elif np.issubdtype(obj.dtype, np.timedelta64): - f = np.vectorize(podpac.core.coordinates.utils.make_timedelta_string) - return f(obj).tolist() - elif np.issubdtype(obj.dtype, np.number): - return obj.tolist() - # datetime64 - elif isinstance(obj, np.datetime64): + if isinstance(obj, np.datetime64): return obj.astype(str) # timedelta64 - elif isinstance(obj, np.timedelta64): + if isinstance(obj, np.timedelta64): return podpac.core.coordinates.utils.make_timedelta_string(obj) # datetime - elif isinstance(obj, datetime.datetime): + if isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() # dataframe - elif isinstance(obj, pd.DataFrame): + if isinstance(obj, pd.DataFrame): return obj.to_json() - # Interpolator - try: - if obj in podpac.core.data.interpolation.INTERPOLATORS: - interpolater_class = deepcopy(obj) - interpolator = interpolater_class() - return interpolator.definition - except Exception as e: - pass - - # default + # numpy array + if isinstance(obj, np.ndarray): + if np.issubdtype(obj.dtype, np.datetime64): + return obj.astype(str).tolist() + if np.issubdtype(obj.dtype, np.timedelta64): + return [podpac.core.coordinates.utils.make_timedelta_string(e) for e in obj] + if np.issubdtype(obj.dtype, np.number): + return obj.tolist() + else: + try: + # completely serialize the individual elements using the custom encoder + return json.loads(json.dumps([e for e in obj], cls=JSONEncoder)) + except TypeError as e: + raise TypeError("Cannot serialize numpy array\n%s" % e) + + # raise the TypeError return json.JSONEncoder.default(self, obj) @@ -306,18 +292,28 @@ def _get_query_params_from_url(url): return params -def _get_from_url(url): +def _get_from_url(url, session=None): """Helper function to get data from an url with error checking. - + Parameters - ----------- - auth_session: podpac.core.authentication.EarthDataSession - Authenticated EDS session - url: str + ---------- + url : str URL to website + session : :class:`requests.Session`, optional + Requests session to use when making the GET request to `url` + + Returns + ------- + str + Text response from request. + See https://2.python-requests.org/en/master/api/#requests.Response.text """ try: - r = requests.get(url) + if session is None: + r = requests.get(url) + else: + r = session.get(url) + if r.status_code != 200: _log.warning( "Could not connect to {}, status code {}. \n *** Return Text *** \n {} \n *** End Return Text ***".format( @@ -330,4 +326,125 @@ def _get_from_url(url): r = None except RuntimeError as e: _log.warning("Cannot authenticate to {}. Check credentials. Error was as follows:".format(url) + str(e)) - return r.text + + return r + + +def cached_property(*args, **kwargs): + """ + Decorator that creates a property that is cached. + + Keyword Arguments + ----------------- + use_cache_ctrl : bool + If True, the property is cached using the Node cache_ctrl. If False, the property is only cached as a private + attribute. Default False. + + Notes + ----- + Podpac caching using the cache_ctrl will be unreliable if the property depends on any non-tagged traits. + The property should only use node attrs (traits tagged with ``attr=True``). + + Examples + -------- + + >>> class MyNode(Node): + # property that is recomputed every time + @property + def my_property(self): + return 0 + + # property is computed once for each object + @cached_property + def my_cached_property(self): + return 1 + + # property that is computed once and can be reused by other Nodes or sessions, depending on the cache_ctrl + @cached_property(use_cache_ctrl=True) + def my_persistent_cached_property(self): + return 2 + """ + + use_cache_ctrl = kwargs.pop("use_cache_ctrl", False) + + if args and (len(args) != 1 or not callable(args[0])): + raise TypeError("cached_property decorator does not accept any positional arguments") + + if kwargs: + raise TypeError("cached_property decorator does not accept keyword argument '%s'" % list(kwargs.keys())[0]) + + def d(fn): + key = "_podpac_cached_property_%s" % fn.__name__ + + @property + def wrapper(self): + if hasattr(self, key): + value = getattr(self, key) + elif use_cache_ctrl and self.has_cache(key): + value = self.get_cache(key) + setattr(self, key, value) + else: + value = fn(self) + setattr(self, key, value) + if use_cache_ctrl: + self.put_cache(value, key) + return value + + return wrapper + + if args: + return d(args[0]) + else: + return d + + +def ind2slice(Is): + """ Convert boolean and integer index arrays to slices. + + Integer and boolean arrays are converted to slices that span the selected elements, but may include additional + elements. If possible, the slices are stepped. + + Arguments + --------- + Is : tuple + tuple of indices (slice, integer array, boolean array, or single integer) + + Returns + ------- + Js : tuple + tuple of slices + """ + + if isinstance(Is, tuple): + return tuple(_ind2slice(I) for I in Is) + else: + return _ind2slice(Is) + + +def _ind2slice(I): + # already a slice + if isinstance(I, slice): + return I + + # convert to numpy array + I = np.atleast_1d(I) + + # convert boolean array to index array + if I.dtype == bool: + (I,) = np.where(I) + + # empty slice + if I.size == 0: + return slice(0, 0) + + # singleton + if I.size == 1: + return I[0] + + # stepped slice + diff = np.diff(I) + if diff.size and np.all(diff == diff[0]) and diff[0] != 0: + return slice(I.min(), I.max() + diff[0], diff[0]) + + # non-stepped slice + return slice(I.min(), I.max() + 1) diff --git a/podpac/data.py b/podpac/data.py index eec916743..df55d1270 100644 --- a/podpac/data.py +++ b/podpac/data.py @@ -5,18 +5,23 @@ # REMINDER: update api docs (doc/source/user/api.rst) to reflect changes to this file from podpac.core.data.datasource import DataSource -from podpac.core.data.interpolation import ( +from podpac.core.data.array_source import Array +from podpac.core.data.pydap_source import PyDAP +from podpac.core.data.rasterio_source import Rasterio +from podpac.core.data.h5py_source import H5PY +from podpac.core.data.csv_source import CSV +from podpac.core.data.dataset_source import Dataset +from podpac.core.data.zarr_source import Zarr +from podpac.core.data.ogc import WCS +from podpac.core.data.reprojection import ReprojectedSource + +from podpac.core.interpolation.interpolation import ( Interpolation, InterpolationException, - interpolation_trait, + InterpolationTrait, INTERPOLATION_DEFAULT, INTERPOLATORS, INTERPOLATION_METHODS, INTERPOLATORS_DICT, INTERPOLATION_METHODS_DICT, ) -from podpac.core.data.array_source import Array -from podpac.core.data.pydap_source import PyDAP -from podpac.core.data.file import Rasterio, H5PY, CSV, Dataset, Zarr -from podpac.core.data.ogc import WCS -from podpac.core.data.reprojection import ReprojectedSource diff --git a/podpac/datalib/__init__.py b/podpac/datalib/__init__.py index 31faf1075..7fa282dbd 100644 --- a/podpac/datalib/__init__.py +++ b/podpac/datalib/__init__.py @@ -25,4 +25,4 @@ # intake requires python >= 3.6 if sys.version >= "3.6": - from podpac.datalib.intake import IntakeCatalog + from podpac.datalib.intake_catalog import IntakeCatalog diff --git a/podpac/datalib/airmoss.py b/podpac/datalib/airmoss.py deleted file mode 100644 index 651f9f771..000000000 --- a/podpac/datalib/airmoss.py +++ /dev/null @@ -1,217 +0,0 @@ -""" -Airmoss summary -""" - -from __future__ import division, unicode_literals, print_function, absolute_import - -import re -from collections import OrderedDict - -import requests -from bs4 import BeautifulSoup -import numpy as np -import traitlets as tl - -# Internal dependencies -import podpac -from podpac.core.data import types as datatype - - -class AirMOSS_Source(datatype.PyDAP): - """Summary - - Attributes - ---------- - datakey : TYPE - Description - date_url_re : TYPE - Description - nan_vals : list - Description - product : TYPE - Description - """ - - product = tl.Enum(["L4RZSM"], default_value="L4RZSM") - date_url_re = re.compile("[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}") - datakey = tl.Unicode("sm1") - nan_vals = [-9999.0] - - def get_native_coordinates(self): - """Summary - - Returns - ------- - TYPE - Description - """ - try: - return self.load_cached_obj("native.coordinates") - except: - pass - - ds = self.dataset - base_date = ds["time"].attributes["units"] - base_date = self.date_url_re.search(base_date).group() - times = (ds["time"][:]).astype("timedelta64[h]") + np.array(base_date, "datetime64") - - lons = podpac.crange(ds["lon"][0], ds["lon"][-1], ds["lon"][1] - ds["lon"][0]) - lats = podpac.crange(ds["lat"][0], ds["lat"][-1], ds["lat"][1] - ds["lat"][0]) - coords = podpac.Coordinates([times, lats, lons], dims=["time", "lat", "lon"]) - self.cache_obj(coords, "native.coordinates") - - return coords - - def get_data(self, coordinates, coordinates_index): - """Summary - - Parameters - ---------- - coordinates : TYPE - Description - coordinates_index : TYPE - Description - - Returns - ------- - TYPE - Description - """ - data = self.dataset[self.datakey].array[tuple(coordinates_index)] - d = self.create_output_array(coordinates, data=data.reshape(coordinates.shape)) - return d - - -class AirMOSS_Site(podpac.OrderedCompositor): - """Summary - - Attributes - ---------- - base_dir_url : TYPE - Description - base_url : TYPE - Description - date_url_re : TYPE - Description - product : TYPE - Description - site : TYPE - Description - """ - - product = tl.Enum(["L4RZSM"], default_value="L4RZSM") - base_url = tl.Unicode("https://thredds.daac.ornl.gov/thredds/dodsC/ornldaac/1421") - base_dir_url = tl.Unicode("https://thredds.daac.ornl.gov/thredds/catalog/ornldaac/1421/catalog.html") - site = tl.Unicode("") - date_url_re = re.compile("[0-9]{8}") - - def get_native_coordinates(self): - """Summary - - Returns - ------- - TYPE - Description - """ - try: - return self.load_cached_obj("native.coordinates") - except: - pass - - ds = self.dataset - times = self.get_available_dates() - lons = podpac.crange(ds["lon"][0], ds["lon"][-1], ds["lon"][1] - ds["lon"][0]) - lats = podpac.crange(ds["lat"][0], ds["lat"][-1], ds["lat"][1] - ds["lat"][0]) - coords = podpac.Coordinates([times, lats, lons], dims=["time", "lat", "lon"]) - self.cache_obj(coords, "native.coordinates") - - return coords - - def get_available_dates(self): - """Summary - - Returns - ------- - TYPE - Description - """ - soup = BeautifulSoup(requests.get(self.base_dir_url).text, "lxml") - a = soup.find_all("a") - regex = self.date_url_re - - times = [] - for aa in a: - text = aa.get_text() - if self.site in text: - m = regex.search(text) - if m: - t = m.group() - times.append(np.datetime64("-".join([t[:4], t[4:6], t[6:]]))) - times.sort() - return np.array(times) - - -class AirMOSS(podpac.OrderedCompositor): - """Summary - - Attributes - ---------- - product : TYPE - Description - site_url_re : TYPE - Description - """ - - product = tl.Enum(["L4RZSM"], default_value="L4RZSM") - site_url_re = tl.Any() - - @tl.default("site_url_re") - def get_site_url_re(self): - """Summary - - Returns - ------- - TYPE - Description - """ - return re.compile(self.product + "_.*_" + "[0-9]{8}.*\.nc4") - - def get_available_sites(self): - """Summary - - Returns - ------- - TYPE - Description - """ - soup = BeautifulSoup(requests.get(self.base_dir_url).text, "lxml") - a = soup.find_all("a") - regex = self.site_url_re - - sites = OrderedDict() - for aa in a: - text = aa.get_text() - m = regex.match(text) - if m: - site = text.split("_")[1] - sites[site] = 1 + sites.get(site, 0) - - return sites - - -if __name__ == "__main__": - ams = AirMOSS_Site(interpolation="nearest_preview", site="BermsP") - print(ams.native_coordinates) - - source = "https://thredds.daac.ornl.gov/thredds/dodsC/ornldaac/1421/L4RZSM_BermsP_20121025_v5.nc4" - am = AirMOSS_Source(source=source, interpolation="nearest_preview") - coords = am.native_coordinates - print(coords) - print(coords["time"].area_bounds) - - lat, lon = am.native_coordinates.coords["lat"], am.native_coordinates.coords["lon"] - lat = lat[::10][np.isfinite(lat[::10])] - lon = lon[::10][np.isfinite(lon[::10])] - coords = podpac.Coordinates([lat, lon], dims=["lat", "lon"]) - o = am.eval(coords) - print("Done") diff --git a/podpac/datalib/cosmos_stations.py b/podpac/datalib/cosmos_stations.py new file mode 100644 index 000000000..ad1b68b17 --- /dev/null +++ b/podpac/datalib/cosmos_stations.py @@ -0,0 +1,402 @@ +from __future__ import division, unicode_literals, print_function, absolute_import + +from six import string_types +import traitlets as tl +import re +import numpy as np +from dateutil import parser +import requests +import json + +try: + import cPickle # Python 2.7 +except: + import _pickle as cPickle +from io import StringIO + +from podpac.core.utils import _get_from_url + +# Optional dependencies +from lazy_import import lazy_module + +bs4 = lazy_module("bs4") + +import podpac +from podpac.core.utils import cached_property + + +def _convert_str_to_vals(properties): + IGNORE_KEYS = ["sitenumber"] + for k, v in properties.items(): + if not isinstance(v, string_types) or k in IGNORE_KEYS: + continue + try: + if "," in v: + properties[k] = tuple([float(vv) for vv in v.split(",")]) + else: + properties[k] = float(v) + except ValueError: + try: + properties[k] = np.datetime64(v) + except ValueError: + pass + return properties + + +class COSMOSStation(podpac.data.DataSource): + _repr_keys = ["label", "network", "location"] + + url = tl.Unicode("http://cosmos.hwr.arizona.edu/Probes/StationDat/") + station_data = tl.Dict().tag(attr=True) + + @tl.default("interpolation") + def _interpolation_default(self): + return {"method": "nearest", "params": {"spatial_tolerance": 1.1, "time_tolerance": np.timedelta64(1, "D")}} + + @cached_property + def raw_data(self): + r = requests.get(self.station_data_url) + return r.text + + @cached_property + def data_columns(self): + return self.raw_data.split("\n", 1)[0].split(" ") + + @property + def site_number(self): + return str(self.station_data["sitenumber"]) + + @property + def station_data_url(self): + return self.url + self.site_number + "/smcounts.txt" + + @property + def station_calibration_url(self): + return self.url + self.site_number + "/calibrationInfo.php" + + @property + def station_properties_url(self): + return self.url + self.site_number + "/index.php" + + def get_data(self, coordinates, coordinates_index): + data = np.loadtxt(StringIO(self.raw_data), skiprows=1, usecols=self.data_columns.index("SOILM"))[ + coordinates_index[0] + ] + data[data > 100] = np.nan + data[data < 0] = np.nan + data /= 100.0 # Make it fractional + return self.create_output_array(coordinates, data=data[:, None, None]) + + def get_coordinates(self): + lat_lon = self.station_data["location"] + time = np.loadtxt( + StringIO(self.raw_data), + skiprows=1, + usecols=[self.data_columns.index("YYYY-MM-DD"), self.data_columns.index("HH:MM")], + dtype=str, + ) + time = np.array([t[0] + "T" + t[1] for t in time], np.datetime64) + c = podpac.Coordinates([time, lat_lon[0], lat_lon[1]], ["time", "lat", "lon"]) + return c + + @property + def label(self): + return self.station_data["label"] + + @property + def network(self): + return self.station_data["network"] + + @property + def location(self): + return self.station_data["location"] + + @cached_property(use_cache_ctrl=True) + def calibration_data(self): + cd = _get_from_url(self.station_calibration_url).json() + cd["items"] = [_convert_str_to_vals(i) for i in cd["items"]] + return cd + + @cached_property(use_cache_ctrl=True) + def site_properties(self): + r = _get_from_url(self.station_properties_url) + soup = bs4.BeautifulSoup(r.text, "lxml") + regex = re.compile("Soil Organic Carbon") + loc = soup.body.findAll(text=regex)[0].parent.parent + label, value = loc.findAll("div") + labels = [l.strip() for l in label.children if "br" not in str(l)] + values = [l.strip() for l in value.children if "br" not in str(l) and l.strip() != ""] + + properties = {k: v for k, v in zip(labels, values)} + + return _convert_str_to_vals(properties) + + +class COSMOSStations(podpac.compositor.OrderedCompositor): + url = tl.Unicode("http://cosmos.hwr.arizona.edu/Probes/") + stations_url = tl.Unicode("sitesNoLegend.js") + + ## PROPERTIES + @cached_property(use_cache_ctrl=True) + def _stations_data_raw(self): + url = self.url + self.stations_url + r = _get_from_url(url) + t = r.text + + # Fix the JSON + t_f = re.sub(':\s?",', ': "",', t) # Missing closing parenthesis + if t_f[-5:] == ",\n]}\n": # errant comma + t_f = t_f[:-5] + "\n]}\n" + + return t_f + + @cached_property + def stations_data(self): + stations = json.loads(self._stations_data_raw) + stations["items"] = [_convert_str_to_vals(i) for i in stations["items"]] + return stations + + @cached_property(use_cache_ctrl=True) + def source_coordinates(self): + lat_lon = np.array(self.stations_value("location")) + c = podpac.Coordinates([[lat_lon[:, 0], lat_lon[:, 1]]], ["lat_lon"]) + return c + + @cached_property + def sources(self): + return np.array([COSMOSStation(station_data=item) for item in self.stations_data["items"]]) + + @property + def available_data_keys(self): + return list(self.stations_data["items"][0].keys()) + + ## UTILITY FUNCTIONS + def stations_value(self, key, stations_data=None): + """ Returns a list of values for all the station for a particular key + + Parameters + ----------- + key: str + Key describing the station data. See self.available_data_keys for available keys. + + Returns + -------- + list + A list of the values for the keys for each station + """ + if key not in self.available_data_keys: + raise ValueError("Input key {} is not in available keys {}".format(key, self.available_data_keys)) + + return self._stations_value(key, stations_data) + + def _stations_value(self, key, stations_data=None): + """ helper function for stations_value + """ + if stations_data is None: + stations_data = self.stations_data + + return [i[key] for i in stations_data["items"]] + + @property + def stations_label(self): + return self.stations_value("label") + + def label_from_latlon(self, lat_lon): + """ Returns the COSMOS station's label given it's lat/lon coordinates + + Parameters + ----------- + lat_lon : podpac.Coordinates + The lat/lon locations whose station name will be returned. Note, the lat/lon coordinates have to match + exactly the coordinates given in station_data[N]['location'], where N is the station. + This should be Coordinates object with 'lat_lon' stacked coordinates as one of the dimensions. + + Returns + -------- + list + List of COSMOS station names corresponding to the given coordinates. If a coordinate has no match, then + "None" is returned. + """ + if "lon_lat" in lat_lon.dims: + lat_lon = lat_lon.transpose("lon_lat") + elif "lat_lon" not in lat_lon.dims: + raise ValueError("The coordinates object must have a stacked 'lat_lon' dimension.") + + labels_map = {s["location"]: s["label"] for s in self.stations_data["items"]} + labels = [labels_map.get(ll, None) for ll in lat_lon.coords["lat_lon"]] + return labels + + def latlon_from_label(self, label): + """ Returns the lat/lon coordinates of COSMOS stations that match the given labels + + Parameters + ------------ + label: str, list + Strings that partially describe a COSMOS station label. + + Returns + -------- + podpac.Coordinates + The coordinates of the COSMOS stations matching the input data + """ + if not isinstance(label, list): + label = [label] + + ind = self._get_label_inds(label) + if ind.size == 0: + return podpac.Coordinates([]) # Empty + + return self.source_coordinates[ind] + + def _get_label_inds(self, label): + """ Helper function to get source indices for partially matched labels """ + ind = [] + for lab in label: + ind.extend([i for i, l in enumerate(self.stations_label) if lab.lower() in l.lower()]) + + ind = np.unique(ind) + return ind + + def get_calibration_data(self, label=None, lat_lon=None): + """ Returns the calibration information for a station. Users must supply a label or lat_lon coordinates. + + Parameters + ------------ + label: str, List (optional) + Labels describing the station. + + lat_lon: podpac.Coordinates (optional) + Coordinates of the COSMOS station. Note, this object has to have a 'lat_lon' dimension which matches exactly + with the COSMOS stations. + + Returns + -------- + list + A list of dictionaries containing the calibration data for the requested stations. + """ + + if label is None and lat_lon is None: + raise ValueError("Must supply either 'label' or 'lat_lon'") + + if lat_lon is not None: + label = self.label_from_latlon(lat_lon) + + if isinstance(label, string_types): + label = [label] + + inds = self._get_label_inds(label) + + return [self.sources[i].calibration_data for i in inds] + + def get_site_properties(self, label=None, lat_lon=None): + """ Returns the site properties for a station. Users must supply a label or lat_lon coordinates. + + Parameters + ------------ + label: str, List (optional) + Labels describing the station. + + lat_lon: podpac.Coordinates (optional) + Coordinates of the COSMOS station. Note, this object has to have a 'lat_lon' dimension which matches exactly + with the COSMOS stations. + + Returns + -------- + list + A list of dictionaries containing the properties for the requested stations. + """ + + if label is None and lat_lon is None: + raise ValueError("Must supply either 'label' or 'lat_lon'") + + if lat_lon is not None: + label = self.label_from_latlon(lat_lon) + + if isinstance(label, string_types): + label = [label] + + inds = self._get_label_inds(label) + + return [self.sources[i].site_properties for i in inds] + + def get_station_data(self, label=None, lat_lon=None): + """ Returns the station data. Users must supply a label or lat_lon coordinates. + + Parameters + ------------ + label: str, List (optional) + Labels describing the station. + + lat_lon: podpac.Coordinates (optional) + Coordinates of the COSMOS station. Note, this object has to have a 'lat_lon' dimension which matches exactly + with the COSMOS stations. + + Returns + -------- + list + A list of dictionaries containing the data for the requested stations. + """ + + if label is None and lat_lon is None: + raise ValueError("Must supply either 'label' or 'lat_lon'") + + if lat_lon is not None: + label = self.label_from_latlon(lat_lon) + + if isinstance(label, string_types): + label = [label] + + inds = self._get_label_inds(label) + + return [self.stations_data["items"][i] for i in inds] + + +if __name__ == "__main__": + bounds = {"lat": [40, 46], "lon": [-78, -68]} + cs = COSMOSStations(cache_ctrl=["ram", "disk"]) + + sd = cs.stations_data + ci = cs.source_coordinates.select(bounds) + ce = podpac.coordinates.merge_dims( + [podpac.Coordinates([podpac.crange("2018-05-01", "2018-06-01", "1,D", "time")]), ci] + ) + o = cs.eval(ce) + + # Test helper functions + labels = cs.stations_label + lat_lon = cs.latlon_from_label("Manitou") + labels = cs.label_from_latlon(lat_lon) + lat_lon2 = cs.latlon_from_label("No Match Here") + cal = cs.get_calibration_data("Manitou") + props = cs.get_site_properties("Manitou") + + from matplotlib import rcParams + + rcParams["axes.labelsize"] = 12 + rcParams["xtick.labelsize"] = 10 + rcParams["ytick.labelsize"] = 10 + rcParams["legend.fontsize"] = 8 + rcParams["lines.linewidth"] = 2 + rcParams["font.size"] = 12 + + import matplotlib.pyplot as plt + import matplotlib.dates as mdates + from pandas.plotting import register_matplotlib_converters + + register_matplotlib_converters() + + fig = plt.figure(figsize=(6.5, 3), dpi=300) + plt.plot(o.time, o.data, "o-") + ax = plt.gca() + plt.ylim(0, 1) + plt.legend(cs.label_from_latlon(ce)) + plt.ylabel("Soil Moisture ($m^3/m^3$)") + plt.xlabel("Date") + # plt.xticks(rotation=90) + fig.autofmt_xdate() + ax.fmt_xdata = mdates.DateFormatter("%m-%d") + plt.title("COSMOS Data for 2018 over lat (40, 46) by lon (-78,-68)") + plt.tight_layout() + plt.show() + + print("Done") diff --git a/podpac/datalib/drought_monitor.py b/podpac/datalib/drought_monitor.py index 9b94581fc..750a35f20 100644 --- a/podpac/datalib/drought_monitor.py +++ b/podpac/datalib/drought_monitor.py @@ -1,9 +1,7 @@ -from podpac.core.node import Node -from podpac.core.style import Style -from podpac.core.utils import NodeTrait -from podpac.core.algorithm.algorithm import Algorithm -from podpac.core.data.file import Zarr -from podpac.core.coordinates import ArrayCoordinates1d +from podpac.algorithm import Algorithm +from podpac.data import Zarr +from podpac.style import Style +from podpac.utils import NodeTrait def drought_style(): @@ -25,7 +23,6 @@ def sm_style(): class DroughtMonitorCategory(Zarr): - # dims = ["lat", "lon", "time"] cf_time = True cf_units = "days since 2018-01-01 00:00:00" cf_calendar = "proleptic_gregorian" @@ -33,12 +30,12 @@ class DroughtMonitorCategory(Zarr): class DroughtCategory(Algorithm): - soil_moisture = NodeTrait() - d0 = NodeTrait() - d1 = NodeTrait() - d2 = NodeTrait() - d3 = NodeTrait() - d4 = NodeTrait() + soil_moisture = NodeTrait().tag(attr=True) + d0 = NodeTrait().tag(attr=True) + d1 = NodeTrait().tag(attr=True) + d2 = NodeTrait().tag(attr=True) + d3 = NodeTrait().tag(attr=True) + d4 = NodeTrait().tag(attr=True) style = drought_style() def algorithm(self, inputs): @@ -61,29 +58,55 @@ def algorithm(self, inputs): if __name__ == "__main__": + import os + import numpy as np import podpac c = podpac.Coordinates([46.6, -123.5, "2018-06-01"], dims=["lat", "lon", "time"]) # local path = "droughtmonitor/beta_parameters.zarr" - d0 = DroughtMonitorCategory(source=path, datakey="d0") - print(d0.native_coordinates) - print(d0.eval(c)) + if not os.path.exists(path): + print("No local drought monitor data found at '%s'" % path) + else: + # drought monitor parameters + d0 = DroughtMonitorCategory(source=path, data_key="d0") + print(d0.coordinates) + print(d0.eval(c)) + + # drought category + mock_sm = podpac.data.Array(data=np.random.random(d0.coordinates.shape), coordinates=d0.coordinates) + + category = DroughtCategory( + soil_moisture=mock_sm, + d0=DroughtMonitorCategory(source=path, data_key="d0"), + d1=DroughtMonitorCategory(source=path, data_key="d1"), + d2=DroughtMonitorCategory(source=path, data_key="d2"), + d3=DroughtMonitorCategory(source=path, data_key="d3"), + d4=DroughtMonitorCategory(source=path, data_key="d4"), + ) + print(category.eval(c)) # s3 bucket = "podpac-internal-test" store = "drought_parameters.zarr" path = "s3://%s/%s" % (bucket, store) - d0 = DroughtMonitorCategory(source=path, datakey="d0") - print(d0.native_coordinates) - print(d0.eval(c)) - - # the Zarr node uses the podpac AWS settings by default, but credentials can be explicitly provided, too - d0 = DroughtMonitorCategory( - source=path, - datakey="d0", - access_key_id=podpac.settings["AWS_ACCESS_KEY_ID"], - secret_access_key=podpac.settings["AWS_SECRET_ACCESS_KEY"], - region_name=podpac.settings["AWS_REGION_NAME"], - ) + d0 = DroughtMonitorCategory(source=path, data_key="d0") + if not d0.s3.exists(path): + print("No drought monitor data found at '%s'. Check your AWS credentials." % path) + else: + print(d0.coordinates) + print(d0.eval(c)) + + # drought category algorithm + mock_sm = podpac.data.Array(source=np.random.random(d0.coordinates.shape), coordinates=d0.coordinates) + + category = DroughtCategory( + soil_moisture=mock_sm, + d0=DroughtMonitorCategory(source=path, data_key="d0"), + d1=DroughtMonitorCategory(source=path, data_key="d1"), + d2=DroughtMonitorCategory(source=path, data_key="d2"), + d3=DroughtMonitorCategory(source=path, data_key="d3"), + d4=DroughtMonitorCategory(source=path, data_key="d4"), + ) + print(category.eval(c)) diff --git a/podpac/datalib/egi.py b/podpac/datalib/egi.py index f6497109b..8b943e6ce 100644 --- a/podpac/datalib/egi.py +++ b/podpac/datalib/egi.py @@ -29,6 +29,7 @@ from podpac.data import DataSource from podpac import authentication from podpac import settings +from podpac import cached_property from podpac.core.units import UnitsDataArray from podpac.core.node import node_eval @@ -84,10 +85,10 @@ class EGI(DataSource): If this setting is not defined, the node will attempt to generate a token using :attr:`self.username` and :attr:`self.password` username : str, optional - EarthData username (https://urs.earthdata.nasa.gov/) + Earthdata username (https://urs.earthdata.nasa.gov/) If undefined, node will look for a username under setting key "username@urs.earthdata.nasa.gov" password : str, optional - EarthData password (https://urs.earthdata.nasa.gov/) + Earthdata password (https://urs.earthdata.nasa.gov/) If undefined, node will look for a password under setting key "password@urs.earthdata.nasa.gov" Attributes @@ -96,11 +97,7 @@ class EGI(DataSource): The data array compiled from downloaded EGI data """ - base_url = tl.Unicode().tag(attr=True) - - @tl.default("base_url") - def _base_url_default(self): - return BASE_URL + base_url = tl.Unicode(default_value=BASE_URL).tag(attr=True) # required short_name = tl.Unicode().tag(attr=True) @@ -169,7 +166,7 @@ def coverage(self): data = tl.Any(allow_none=True) _url = tl.Unicode(allow_none=True) - @property + @cached_property def source(self): """ URL Endpoint built from input parameters @@ -201,13 +198,14 @@ def _append(u, key, val): # other parameters are included at eval time return url - def get_native_coordinates(self): - if self.data is not None: - return Coordinates.from_xarray(self.data.coords, crs=self.data.attrs["crs"]) - else: + @property + def coordinates(self): + if self.data is None: _log.warning("No coordinates found in EGI source") return Coordinates([], dims=[]) + return Coordinates.from_xarray(self.data.coords, crs=self.data.attrs["crs"]) + def get_data(self, coordinates, coordinates_index): if self.data is not None: da = self.data[coordinates_index] @@ -229,8 +227,6 @@ def eval(self, coordinates, output=None): " which case EGI is returning no data." ) raise e - # Force update on native_coordinates (in case of multiple evals) - self.set_trait("native_coordinates", self.get_native_coordinates()) # run normal eval once self.data is prepared return super(EGI, self).eval(coordinates, output) @@ -313,6 +309,7 @@ def _download(self, coordinates): zipfile.ZipFile Returns zip file byte-str to downloaded data """ + # Ensure Coordinates are in decimal lat-lon coordinates = coordinates.transform("epsg:4326") self._authenticate() @@ -499,7 +496,7 @@ def token_valid(self): def get_token(self): """ - Get token for EGI interface using EarthData credentials + Get token for EGI interface using Earthdata credentials Returns ------- @@ -509,7 +506,7 @@ def get_token(self): Raises ------ ValueError - Raised if EarthData username or password is unavailable + Raised if Earthdata username or password is unavailable """ # token access URL url = "https://cmr.earthdata.nasa.gov/legacy-services/rest/tokens" @@ -517,12 +514,12 @@ def get_token(self): if self.username is not None: settings["username@EGI"] = self.username else: - raise ValueError("No EarthData username available to request EGI token") + raise ValueError("No Earthdata username available to request EGI token") if self.password is not None: settings["password@EGI"] = self.password else: - raise ValueError("No EarthData password available to request EGI token") + raise ValueError("No Earthdata password available to request EGI token") _ip = self._get_ip() request = """ diff --git a/podpac/datalib/gfs.py b/podpac/datalib/gfs.py index 964770126..b65147a47 100644 --- a/podpac/datalib/gfs.py +++ b/podpac/datalib/gfs.py @@ -1,138 +1,152 @@ from __future__ import division, unicode_literals, print_function, absolute_import -import logging import datetime import traitlets as tl import numpy as np -# Helper utility for optional imports from lazy_import import lazy_module -# Optional Imports -rasterio = lazy_module("rasterio") -boto3 = lazy_module("boto3") -botocore = lazy_module("botocore") +s3fs = lazy_module("s3fs") # Internal imports from podpac.data import DataSource, Rasterio from podpac.coordinates import Coordinates, merge_dims +from podpac.utils import cached_property, DiskCacheMixin +from podpac.core.authentication import S3Mixin BUCKET = "noaa-gfs-pds" -s3 = boto3.resource("s3") -s3.meta.client.meta.events.register("choose-signer.s3.*", botocore.handlers.disable_signing) -bucket = s3.Bucket(BUCKET) -# TODO add time to native_coordinates -class GFSSource(Rasterio): +class GFSSource(DiskCacheMixin, Rasterio): parameter = tl.Unicode().tag(attr=True) level = tl.Unicode().tag(attr=True) date = tl.Unicode().tag(attr=True) hour = tl.Unicode().tag(attr=True) forecast = tl.Unicode().tag(attr=True) - def init(self): - self._logger = logging.getLogger(__name__) - - # check if the key exists - try: - s3.Object(BUCKET, self._key).load() - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "404": - raise ValueError("Not found: '%s'" % self._key) # TODO list options - else: - raise - - @property - def _key(self): - return "%s/%s/%s/%s/%s" % (self.parameter, self.level, self.date, self.hour, self.forecast) - - @tl.default("nan_vals") - def _get_nan_vals(self): - return [self.dataset.nodata] - # return list(self.dataset.nodatavals) # which? - @property def source(self): - return self._key - - @tl.default("dataset") - def open_dataset(self): - """Opens the data source""" - - cache_key = "fileobj" - with rasterio.MemoryFile() as f: - if self.cache_ctrl and self.has_cache(key=cache_key): - data = self.get_cache(key=cache_key) - f.write(data) - else: - self._logger.info("Downloading S3 fileobj (Bucket: %s, Key: %s)" % (BUCKET, self._key)) - s3.Object(BUCKET, self._key).download_fileobj(f) - f.seek(0) - self.cache_ctrl and self.put_cache(f.read(), key=cache_key) - f.seek(0) - - dataset = f.open() - - return dataset + return "s3://%s/%s/%s/%s/%s/%s" % (BUCKET, self.parameter, self.level, self.date, self.hour, self.forecast) -class GFS(DataSource): +# TODO time interpolation +class GFS(S3Mixin, DiskCacheMixin, DataSource): parameter = tl.Unicode().tag(attr=True) level = tl.Unicode().tag(attr=True) date = tl.Unicode().tag(attr=True) hour = tl.Unicode().tag(attr=True) - @property - def source(self): - return "%s/%s/%s/%s" % (self.parameter, self.level, self.date, self.hour) + cache_coordinates = tl.Bool(True) - def init(self): - # TODO check prefix and the options at the next level - - self._prefix = "%s/%s/%s/%s/" % (self.parameter, self.level, self.date, self.hour) - self.forecasts = [obj.key.replace(self._prefix, "") for obj in bucket.objects.filter(Prefix=self._prefix)] + @property + def prefix(self): + return "%s/%s/%s/%s/%s/" % (BUCKET, self.parameter, self.level, self.date, self.hour) - if not self.forecasts: - raise ValueError("Not found: '%s/*'" % self._prefix) + @cached_property(use_cache_ctrl=True) + def forecasts(self): + return [path.replace(self.prefix, "") for path in self.s3.find(self.prefix)] + @cached_property + def sources(self): params = { "parameter": self.parameter, "level": self.level, "date": self.date, "hour": self.hour, "cache_ctrl": self.cache_ctrl, + "s3": self.s3, } - self._sources = np.array([GFSSource(forecast=h, **params) for h in self.forecasts]) # can we load this lazily? + return np.array([GFSSource(forecast=forecast, **params) for forecast in self.forecasts]) - nc = self._sources[0].native_coordinates + def get_coordinates(self): + nc = self.sources[0].coordinates base_time = datetime.datetime.strptime("%s %s" % (self.date, self.hour), "%Y%m%d %H%M") forecast_times = [base_time + datetime.timedelta(hours=int(h)) for h in self.forecasts] - tc = Coordinates([[dt.strftime("%Y-%m-%d %H:%M") for dt in forecast_times]], dims=["time"], crs=nc.crs) - self.set_trait("native_coordinates", merge_dims([nc, tc])) + tc = Coordinates( + [[dt.strftime("%Y-%m-%d %H:%M") for dt in forecast_times]], dims=["time"], crs=nc.crs, validate_crs=False + ) + return merge_dims([nc, tc]) def get_data(self, coordinates, coordinates_index): data = self.create_output_array(coordinates) - for i, source in enumerate(self._sources[coordinates_index[2]]): + for i, source in enumerate(self.sources[coordinates_index[2]]): data[:, :, i] = source.eval(coordinates.drop("time")) return data -class GFSLatest(GFS): - # TODO raise exception if date or hour is in init args - - def init(self): - now = datetime.datetime.now() - - # date - self.set_trait("date", now.strftime("%Y%m%d")) - - # hour - prefix = "%s/%s/%s/" % (self.parameter, self.level, self.date) - objs = bucket.objects.filter(Prefix=prefix) - hours = set(obj.key.split("/")[3] for obj in objs) - if hours: - self.set_trait("hour", max(hours)) - - super(GFSLatest, self).init() +def GFSLatest(parameter=None, level=None, **kwargs): + # date + date = datetime.datetime.now().strftime("%Y%m%d") + + # hour + prefix = "%s/%s/%s/%s/" % (BUCKET, parameter, level, date) + s3 = s3fs.S3FileSystem(anon=True) + hours = set([path.replace(prefix, "")[:4] for path in s3.find(prefix)]) + if not hours: + raise RuntimeError("No data found at '%s'" % prefix) + hour = max(hours) + + # node + return GFS(parameter=parameter, level=level, date=date, hour=hour, **kwargs) + + +if __name__ == "__main__": + import datetime + import podpac + + # switch to 'disk' cache to cache s3 data + cache_ctrl = ["ram"] + # cache_ctrl = ['ram', 'disk'] + + parameter = "SOIM" + level = "0-10 m DPTH" + + now = datetime.datetime.now() + yesterday = now - datetime.timedelta(1) + tomorrow = now + datetime.timedelta(1) + + # GFSSource (specify source date/time and forecast) + print("GFSSource node (parameter, level, date, hour)") + gfs_soim = GFSSource( + parameter=parameter, + level=level, + date=yesterday.strftime("%Y%m%d"), + hour="1200", + forecast="003", + cache_ctrl=cache_ctrl, + anon=True, + ) + + o = gfs_soim.eval(gfs_soim.coordinates) + print(o) + + # GFS (specify source date/time, select forecast at evaluation) + print("GFS node (parameter, level, date, hour)") + gfs_soim = GFS( + parameter=parameter, + level=level, + date=yesterday.strftime("%Y%m%d"), + hour="1200", + cache_ctrl=cache_ctrl, + anon=True, + ) + + # whole world forecast at this time tomorrow + c = Coordinates([gfs_soim.coordinates["lat"], gfs_soim.coordinates["lon"], tomorrow], dims=["lat", "lon", "time"]) + o = gfs_soim.eval(c) + print(o) + + # time series: get the forecast at lat=42, lon=275 every hour for the next 6 hours + start = now + stop = now + datetime.timedelta(hours=6) + c = Coordinates([42, 282, podpac.crange(start, stop, "1,h")], dims=["lat", "lon", "time"]) + o = gfs_soim.eval(c) + print(o) + + # latest (get latest source, select forecast at evaluation) + print("GFSLatest node (parameter, level)") + gfs_soim = GFSLatest(parameter=parameter, level=level, cache_ctrl=cache_ctrl, anon=True) + c = Coordinates([gfs_soim.coordinates["lat"], gfs_soim.coordinates["lon"], tomorrow], dims=["lat", "lon", "time"]) + o = gfs_soim.eval(c) + print(o) diff --git a/podpac/datalib/intake.py b/podpac/datalib/intake_catalog.py similarity index 83% rename from podpac/datalib/intake.py rename to podpac/datalib/intake_catalog.py index 0be222f4b..6d6d54155 100644 --- a/podpac/datalib/intake.py +++ b/podpac/datalib/intake_catalog.py @@ -12,6 +12,7 @@ # Internal imports import podpac from podpac import Coordinates +from podpac.utils import cached_property intake = lazy_module("intake") # lazy_module('intake.catalog.local.LocalCatalogEntry') @@ -33,7 +34,7 @@ class IntakeCatalog(podpac.data.DataSource): If source is a dataframe with multiple fields, this specifies the field to use for analysis.for Can be defined in the metadata in the intake catalog source. dims : dict, optional - Dictionary defining the native coordinates dimensions in the intake catalog source. + Dictionary defining the coordinates dimensions in the intake catalog source. Keys are the podpac dimensions (lat, lon, time, alt) in stacked or unstacked form. Values are the identifiers which locate the coordinates in the datasource. Can be defined in the metadata in the intake catalog source. @@ -42,7 +43,7 @@ class IntakeCatalog(podpac.data.DataSource): {'lat_lon': ['lat column', 'lon column']} {'time': 'time'} crs : str, optional - Coordinate reference system of the native coordinates. + Coordinate reference system of the coordinates. Can be defined in the metadata in the intake catalog source. @@ -57,7 +58,7 @@ class IntakeCatalog(podpac.data.DataSource): """ # input parameters - source = tl.Unicode().tag(readonly=True) + source = tl.Unicode().tag(attr=True) uri = tl.Unicode() # optional input parameters @@ -65,18 +66,12 @@ class IntakeCatalog(podpac.data.DataSource): dims = tl.Dict(default_value=None, allow_none=True) crs = tl.Unicode(default_value=None, allow_none=True) - # attributes - catalog = tl.Any() # This should be lazy-loaded, but haven't problems with that currently - # tl.Instance(intake.catalog.Catalog) - datasource = tl.Any() # Same as above - # datasource = tl.Instance(intake.catalog.local.LocalCatalogEntry) - - @tl.default("catalog") - def _default_catalog(self): + @cached_property + def catalog(self): return intake.open_catalog(self.uri) - @tl.default("datasource") - def _default_datasource(self): + @cached_property + def datasource(self): return getattr(self.catalog, self.source) # TODO: validators may not be necessary @@ -121,8 +116,8 @@ def _validate_dims(self, proposed): return dims - def get_native_coordinates(self): - """Get native coordinates from catalog definition or input dims + def get_coordinates(self): + """Get coordinates from catalog definition or input dims """ # look for dims in catalog @@ -130,7 +125,7 @@ def get_native_coordinates(self): if "dims" in self.datasource.metadata: self.dims = self.datasource.metadata["dims"] else: - raise ValueError("No native coordinates dims defined in catalog or input") + raise ValueError("No coordinates dims defined in catalog or input") # look for crs in catalog if self.crs is None: @@ -179,3 +174,26 @@ def get_data(self, coordinates, coordinates_index): # create UnitDataArray with subselected data (idx) uda = self.create_output_array(coordinates, data=data[coordinates_index]) return uda + + +if __name__ == "__main__": + node = IntakeCatalog( + uri="../podpac-examples/notebooks/demos/intake/precip/catalog.yml", # path to catalog + source="southern_rockies", # name of the source within catalog + field="precip", # this can be defined in catalog source metadata + dims={"time": "time"}, # this can be defined in catalog source metadata + ) + + print("catalog") + print(node.catalog) + + print("datasource") + print(node.datasource) + + print("coordinates") + print(node.coordinates) + + print("eval") + print(node.eval(node.coordinates)) + + print("done") diff --git a/podpac/datalib/modis-pds.py b/podpac/datalib/modis-pds.py new file mode 100644 index 000000000..12635db81 --- /dev/null +++ b/podpac/datalib/modis-pds.py @@ -0,0 +1,364 @@ +""" +MODIS on AWS OpenData + +MODIS Coordinates Grids: https://modis-land.gsfc.nasa.gov/MODLAND_grid.html +""" + +import logging +import datetime + +import numpy as np +import traitlets as tl + +import podpac +from podpac.utils import cached_property +from podpac.compositor import UniformTileCompositor, UniformTileMixin, OrderedCompositor +from podpac.data import Rasterio, DataSource +from podpac.authentication import S3Mixin + +_logger = logging.getLogger(__name__) + +BUCKET = "modis-pds" +PRODUCTS = ["MCD43A4.006", "MOD09GA.006", "MYD09GA.006", "MOD09GQ.006", "MYD09GQ.006"] +CRS = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs +type=crs" + +SINUSOIDAL_HORIZONTAL = { + "00": (-20014877.697641734, -18903390.490691263), + "01": (-18902927.177974734, -17791439.971025266), + "02": (-17790976.658308737, -16679489.451358264), + "03": (-16679026.138641736, -15567538.931691263), + "04": (-15567075.618974736, -14455588.412025262), + "05": (-14455125.099308735, -13343637.892358262), + "06": (-13343174.579641735, -12231687.372691263), + "07": (-12231224.059974736, -11119736.853025263), + "08": (-11119273.540308736, -10007786.333358264), + "09": (-10007323.020641735, -8895835.813691262), + "10": (-8895372.500974735, -7783885.294025263), + "11": (-7783421.981308736, -6671934.774358263), + "12": (-6671471.461641735, -5559984.254691264), + "13": (-5559520.941974737, -4448033.735025264), + "14": (-4447570.422308736, -3336083.215358263), + "15": (-3335619.902641736, -2224132.695691264), + "16": (-2223669.382974736, -1112182.176025264), + "17": (-1111718.863308736, -231.656358264), + "18": (231.656358264, 1111718.863308736), + "19": (1112182.176025264, 2223669.382974736), + "20": (2224132.695691264, 3335619.902641736), + "21": (3336083.215358264, 4447570.422308736), + "22": (4448033.735025263, 5559520.941974737), + "23": (5559984.254691265, 6671471.461641736), + "24": (6671934.774358264, 7783421.981308737), + "25": (7783885.294025264, 8895372.500974735), + "26": (8895835.813691264, 10007323.020641737), + "27": (10007786.333358264, 11119273.540308736), + "28": (11119736.853025265, 12231224.059974737), + "29": (12231687.372691264, 13343174.579641737), + "30": (13343637.892358264, 14455125.099308737), + "31": (14455588.412025264, 15567075.618974738), + "32": (15567538.931691265, 16679026.138641737), + "33": (16679489.451358264, 17790976.658308737), + "34": (17791439.971025266, 18902927.177974734), + "35": (18903390.490691263, 20014877.697641734), +} + +SINUSOIDAL_VERTICAL = { + "00": (10007323.020641735, 8895835.813691262), + "01": (8895372.500974735, 7783885.294025263), + "02": (7783421.981308736, 6671934.774358263), + "03": (6671471.461641735, 5559984.254691264), + "04": (5559520.941974737, 4448033.735025264), + "05": (4447570.422308736, 3336083.215358263), + "06": (3335619.902641736, 2224132.695691264), + "07": (2223669.382974736, 1112182.176025264), + "08": (1111718.863308736, 231.656358264), + "09": (-231.656358264, -1111718.863308736), + "10": (-1112182.176025264, -2223669.382974736), + "11": (-2224132.695691264, -3335619.902641736), + "12": (-3336083.215358264, -4447570.422308736), + "13": (-4448033.735025263, -5559520.941974737), + "14": (-5559984.254691265, -6671471.461641736), + "15": (-6671934.774358264, -7783421.981308737), + "16": (-7783885.294025264, -8895372.500974735), + "17": (-8895835.813691264, -10007323.020641737), +} + + +def _parse_modis_date(date): + return datetime.datetime.strptime(date, "%Y%j").strftime("%Y-%m-%d") + + +def _available(s3, *l): + prefix = "/".join([BUCKET] + list(l)) + return [obj.replace(prefix + "/", "") for obj in s3.ls(prefix) if "_scenes.txt" not in obj] + + +def get_tile_coordinates(h, v): + """ use pre-fetched lat and lon bounds to get coordinates for a single tile """ + lat_start, lat_stop = SINUSOIDAL_VERTICAL[v] + lon_start, lon_stop = SINUSOIDAL_HORIZONTAL[h] + lat = podpac.clinspace(lat_start, lat_stop, 2400, name="lat") + lon = podpac.clinspace(lon_start, lon_stop, 2400, name="lon") + return podpac.Coordinates([lat, lon], crs=CRS) + + +class MODISSource(Rasterio): + """ + Individual MODIS data tile using AWS OpenData, with caching. + + Attributes + ---------- + product : str + MODIS product ('MCD43A4.006', 'MOD09GA.006', 'MYD09GA.006', 'MOD09GQ.006', or 'MYD09GQ.006') + horizontal : str + column in the MODIS Sinusoidal Tiling System, e.g. '21' + vertical : str + row in the MODIS Sinusoidal Tiling System, e.g. '07' + date : str + year and three-digit day of year, e.g. '2011260' + data : str + individual object (varies by product) + """ + + product = tl.Enum(values=PRODUCTS, help="MODIS product ID").tag(attr=True) + horizontal = tl.Unicode(help="column in the MODIS Sinusoidal Tiling System, e.g. '21'").tag(attr=True) + vertical = tl.Unicode(help="row in the MODIS Sinusoidal Tiling System, e.g. '07'").tag(attr=True) + date = tl.Unicode(help="year and three-digit day of year, e.g. '2011460'").tag(attr=True) + data_key = tl.Unicode(help="data to retrieve (varies by product)").tag(attr=True) + + check_exists = tl.Bool(True) + + _repr_keys = ["prefix", "data_key"] + + def init(self): + """ validation """ + for key in ["horizontal", "vertical", "date", "data_key"]: + if not getattr(self, key): + raise ValueError("MODISSource '%s' required" % key) + if self.horizontal not in ["%02d" % h for h in range(36)]: + raise ValueError("MODISSource horizontal invalid ('%s' should be between '00' and '35')" % self.horizontal) + if self.vertical not in ["%02d" % v for v in range(36)]: + raise ValueError("MODISSource vertical invalid ('%s' should be between '00' and '17'" % self.vertical) + try: + _parse_modis_date(self.date) + except ValueError: + raise ValueError("MODISSource date invalid ('%s' should be year and doy, e.g. '2009260'" % self.date) + if self.check_exists and not self.exists: + raise ValueError("No S3 object found at '%s'" % self.source) + + @cached_property(use_cache_ctrl=True) + def filename(self): + _logger.info( + "Looking up source filename (product=%s, h=%s, v=%s, date=%s, data_key=%s)..." + % (self.product, self.horizontal, self.vertical, self.date, self.data_key) + ) + prefix = "/".join([BUCKET, self.product, self.horizontal, self.vertical, self.date]) + objs = [obj.replace(prefix + "/", "") for obj in self.s3.ls(prefix) if obj.endswith("%s.TIF" % self.data_key)] + if len(objs) == 0: + raise RuntimeError("No matches found for data_key='%s' at '%s'" % (self.data_key, prefix)) + if len(objs) > 1: + raise RuntimeError("Too many matches for data_key='%s' at '%s' (%s)" % (self.data_key, prefix, objs)) + return objs[0] + + @property + def prefix(self): + return "%s/%s/%s/%s" % (self.product, self.horizontal, self.vertical, self.date) + + @cached_property + def source(self): + return "s3://%s/%s/%s" % (BUCKET, self.prefix, self.filename) + + @cached_property + def exists(self): + return self.s3.exists(self.source) + + def get_coordinates(self): + # use pre-fetched coordinate bounds (instead of loading from the dataset) + return get_tile_coordinates(self.horizontal, self.vertical) + + +class MODISTile(S3Mixin, DataSource): + product = tl.Enum(values=PRODUCTS, help="MODIS product ID").tag(attr=True) + horizontal = tl.Unicode(help="column in the MODIS Sinusoidal Tiling System, e.g. '21'").tag(attr=True) + vertical = tl.Unicode(help="row in the MODIS Sinusoidal Tiling System, e.g. '07'").tag(attr=True) + data_key = tl.Unicode(help="data to retrieve (varies by product)").tag(attr=True) + + _repr_keys = ["product", "data_key"] + + @cached_property + def sources(self): + return [self._make_source(date) for date in self.available_dates] + + @cached_property(use_cache_ctrl=True) # TODO expiration + def available_dates(self): + _logger.info( + "Looking up available dates (product=%s, h=%s, v=%s)..." % (self.product, self.horizontal, self.vertical) + ) + return _available(self.s3, self.product, self.horizontal, self.vertical) + + @cached_property + def tile_coordinates(self): + return get_tile_coordinates(self.horizontal, self.vertical) + + def get_coordinates(self): + # lookup available dates and use pre-fetched lat and lon bounds + time = podpac.Coordinates([[_parse_modis_date(date) for date in self.available_dates]], dims=["time"], crs=CRS) + return podpac.coordinates.merge_dims([time, self.tile_coordinates]) + + def get_data(self, coordinates, coordinates_index): + data = self.create_output_array(coordinates) + for i, source in enumerate(self.sources[coordinates_index[0]]): + data[i, :, :] = source.eval(coordinates.drop("time")) + return data + + def _make_source(self, date): + return MODISSource( + product=self.product, + horizontal=self.horizontal, + vertical=self.vertical, + date=date, + data_key=self.data_key, + check_exists=False, + cache_ctrl=self.cache_ctrl, + force_eval=self.force_eval, + cache_output=self.cache_output, + cache_dataset=True, + s3=self.s3, + ) + + +class MODIS(S3Mixin, OrderedCompositor): + """ MODIS whole-world compositor. + + Attributes + ---------- + product : str + MODIS product ('MCD43A4.006', 'MOD09GA.006', 'MYD09GA.006', 'MOD09GQ.006', or 'MYD09GQ.006') + data : str + individual object (varies by product) + """ + + product = tl.Enum(values=PRODUCTS, help="MODIS product ID").tag(attr=True) + data_key = tl.Unicode(help="data to retrieve (varies by product)").tag(attr=True) + + tile_width = (1, 2400, 2400) + start_date = "2013-01-01" + end_date = datetime.date.today().strftime("%Y-%m-%d") + + _repr_keys = ["product", "data_key"] + + @cached_property + def sources(self): + return [self._make_tile(h, v) for h, v in self.available_tiles] + + @cached_property(use_cache_ctrl=True) + def available_tiles(self): + _logger.info("Looking up available tiles...") + return [(h, v) for h in _available(self.s3, self.product) for v in _available(self.s3, self.product, h)] + + def select_sources(self, coordinates): + """ 2d select sources filtering """ + + sources = super(MODIS, self).select_sources(coordinates) + + # filter tiles spatially + ct = coordinates.transform(CRS) + return [source for source in sources if ct.select(source.tile_coordinates.bounds).size > 0] + + def _make_tile(self, horizontal, vertical): + return MODISTile( + product=self.product, + horizontal=horizontal, + vertical=vertical, + data_key=self.data_key, + cache_ctrl=self.cache_ctrl, + force_eval=self.force_eval, + cache_output=self.cache_output, + cache_dataset=True, + s3=self.s3, + ) + + +if __name__ == "__main__": + from matplotlib import pyplot + + # ------------------------------------------------------------------------- + # basic modis source + # ------------------------------------------------------------------------- + + source = MODISSource( + product=PRODUCTS[0], + data_key="B01", + horizontal="01", + vertical="11", + date="2020009", + cache_ctrl=["disk"], + cache_dataset=True, + cache_output=False, + ) + + print("source: %s" % repr(source)) + print("path: %s" % source.source) + print("coordinates: %s", source.coordinates) + + # native coordinates + o1 = source.eval(source.coordinates) + + # cropped and resampled using EPSG:4326 coordinates + c = podpac.Coordinates([podpac.clinspace(-22, -20, 200), podpac.clinspace(-176, -174, 200)], dims=["lat", "lon"]) + o2 = source.eval(c) + + # ------------------------------------------------------------------------- + # modis tile with time + # ------------------------------------------------------------------------- + + tile = MODISTile( + product=PRODUCTS[0], data_key="B01", horizontal="01", vertical="11", cache_ctrl=["disk"], cache_output=False + ) + + print("tile: %s" % repr(tile)) + print( + "available dates: %s-%s (n=%d)" % (tile.available_dates[0], tile.available_dates[-1], len(tile.available_dates)) + ) + print("coordinates: %s" % tile.coordinates) + + # existing date + assert "2020009" in tile.available_dates + ct1 = podpac.Coordinates(["2020-01-09", c["lat"], c["lon"]], dims=["time", "lat", "lon"]) + o2 = tile.eval(ct1) + + # nearest date + assert "2020087" not in tile.available_dates + ct2 = podpac.Coordinates(["2020-03-27", c["lat"], c["lon"]], dims=["time", "lat", "lon"]) + o3 = tile.eval(ct2) + + # time-series + ct3 = podpac.Coordinates([["2019-01-01", "2019-02-01", "2019-03-01"], -21.45, -174.92], dims=["time", "lat", "lon"]) + o4 = tile.eval(ct3) + + # ------------------------------------------------------------------------- + # modis compositor + # ------------------------------------------------------------------------- + + node = MODIS(product=PRODUCTS[0], data_key="B01", cache_ctrl=["disk"], cache_output=False) + + print("node: %s" % repr(node)) + print("sources: n=%d" % len(node.sources)) + print(" .e.g: %s" % repr(node.sources[0])) + + # single tile + assert len(node.select_sources(ct2)) == 1 + o5 = node.eval(ct2) + + # time-series in a single tile + assert len(node.select_sources(ct3)) == 1 + o6 = node.eval(ct3) + + # multiple tiles + ct3 = podpac.Coordinates( + ["2020-01-09", podpac.clinspace(45, 55, 200), podpac.clinspace(-80, -40, 200)], dims=["time", "lat", "lon"] + ) + assert len(node.select_sources(ct3)) == 7 + o7 = node.eval(ct3) + + # o7.plot() + # pyplot.show() diff --git a/podpac/datalib/nasaCMR.py b/podpac/datalib/nasaCMR.py index e01532ae1..8523273c1 100644 --- a/podpac/datalib/nasaCMR.py +++ b/podpac/datalib/nasaCMR.py @@ -3,39 +3,30 @@ """ from __future__ import division, unicode_literals, print_function, absolute_import -import warnings -from copy import deepcopy -from collections import OrderedDict -from six import string_types - -import requests -import os import json import logging +import requests +import numpy as np + _logger = logging.getLogger(__name__) - -import numpy as np -import traitlets as tl +from podpac.core.utils import _get_from_url CMR_URL = r"https://cmr.earthdata.nasa.gov/search/" -def get_collection_id(auth_session=None, short_name=None, keyword=None, **kwargs): +def get_collection_id(session=None, short_name=None, keyword=None, **kwargs): """ Users NASA CMR to retrieve metadata about a data collection Parameters ----------- - auth_session: podpac.core.authentication.Session, optional + session: :class:`requets.Session`, optional An authenticated Earthdata login session - short_name: str, optional The short name of the dataset - keyword: str, optional Any keyword search parameters - **kwargs: str, optional Any additional query parameters @@ -59,12 +50,11 @@ def get_collection_id(auth_session=None, short_name=None, keyword=None, **kwargs query_string = "&".join([k + "=" + v for k, v in kwargs.items()]) - if auth_session is None: - auth_session = requests - - json_ = _get_from_url(auth_session, base_url + query_string) + # use generic requests session if `session` is not defined + if session is None: + session = requests - pydict = json.loads(json_) + pydict = _get_from_url(base_url + query_string, session).json() entries = pydict["feed"]["entry"] if len(entries) > 1: @@ -75,17 +65,15 @@ def get_collection_id(auth_session=None, short_name=None, keyword=None, **kwargs return collection_id -def search_granule_json(auth_session=None, entry_map=None, **kwargs): +def search_granule_json(session=None, entry_map=None, **kwargs): """ Search for specific files from NASA CMR for a particular collection Parameters ----------- - auth_session: podpac.core.authentication.Session, optional + session: :class:`requets.Session`, optional An authenticated Earthdata login session - entry_map: function A function applied to each individual entry. Could be used to filter out certain data in an entry - **kwargs: dict Additional query string parameters. At minimum the provider, provider_id, concept_id, collection_concept_id, short_name, version, or entry_title @@ -98,7 +86,20 @@ def search_granule_json(auth_session=None, entry_map=None, **kwargs): """ base_url = CMR_URL + "granules.json?" - if "collection_id" not in kwargs and "short_name" not in kwargs: + if not np.any( + [ + m not in kwargs + for m in [ + "provider", + "provider_id", + "concept_id", + "collection_concept_id", + "short_name", + "version", + "entry_title", + ] + ] + ): raise ValueError( "Need to provide either" " provider, provider_id, concept_id, collection_concept_id, short_name, version or entry_title" @@ -113,27 +114,26 @@ def search_granule_json(auth_session=None, entry_map=None, **kwargs): query_string = "&".join([k + "=" + str(v) for k, v in kwargs.items()]) - if auth_session is None: - auth_session = requests + if session is None: + session = requests url = base_url + query_string if "page_num" not in kwargs: - entries = _get_all_granule_pages(auth_session, url, entry_map) + entries = _get_all_granule_pages(session, url, entry_map) else: - granules = _get_from_url(auth_session, url) - pydict = json.loads(granules) + pydict = _get_from_url(url, session).json() entries = list(map(entry_map, pydict["feed"]["entry"])) return entries -def _get_all_granule_pages(auth_session, url, entry_map, max_paging_depth=1000000): +def _get_all_granule_pages(session, url, entry_map, max_paging_depth=1000000): """ Helper function for searching through all pages for a collection. Parameters ----------- - auth_session: podpac.core.authentication.EarthDataSession - Authenticated EDS session + session: :class:`requets.Session`, optional + An authenticated Earthdata login session url: str URL to website entry_map: function @@ -143,40 +143,13 @@ def _get_all_granule_pages(auth_session, url, entry_map, max_paging_depth=100000 page_size = int([q for q in url.split("?")[1].split("&") if "page_size" in q][0].split("=")[1]) max_pages = int(max_paging_depth / page_size) - pydict = json.loads(_get_from_url(auth_session, url)) + pydict = _get_from_url(url, session).json() entries = list(map(entry_map, pydict["feed"]["entry"])) for i in range(1, max_pages): page_url = url + "&page_num=%d" % (i + 1) - page_entries = json.loads(_get_from_url(auth_session, page_url))["feed"]["entry"] + page_entries = _get_from_url(page_url, session).json()["feed"]["entry"] if not page_entries: break entries.extend(list(map(entry_map, page_entries))) return entries - - -def _get_from_url(auth_session, url): - """Helper function to get data from an url with error checking. - - Parameters - ----------- - auth_session: podpac.core.authentication.EarthDataSession - Authenticated EDS session - url: str - URL to website - """ - try: - r = auth_session.get(url) - if r.status_code != 200: - _logger.warning( - "Could not connect to {}, status code {}. \n *** Return Text *** \n {} \n *** End Return Text ***".format( - url, r.status_code, r.text - ) - ) - - except requests.ConnectionError as e: - _logger.warning("Cannot connect to {}:".format(url) + str(e)) - r = None - except RuntimeError as e: - _logger.warning("Cannot authenticate to {}. Check credentials. Error was as follows:".format(url) + str(e)) - return r.text diff --git a/podpac/datalib/smap.py b/podpac/datalib/smap.py index aa0940d4c..202312e84 100644 --- a/podpac/datalib/smap.py +++ b/podpac/datalib/smap.py @@ -30,10 +30,11 @@ _logger = logging.getLogger(__name__) # Helper utility for optional imports -from lazy_import import lazy_module +from lazy_import import lazy_module, lazy_class # Optional dependencies bs4 = lazy_module("bs4") +BeautifulSoup = lazy_class("bs4.BeautifulSoup") boto3 = lazy_module("boto3") # fixing problem with older versions of numpy @@ -46,51 +47,36 @@ def isnat(a): # Internal dependencies import podpac -from podpac.core.coordinates import Coordinates, union, merge_dims, concat -from podpac.core.data import pydap_source -from podpac.core import authentication -from podpac.core.utils import common_doc +from podpac import NodeException +from podpac import authentication +from podpac.coordinates import Coordinates, merge_dims +from podpac.data import PyDAP +from podpac.utils import cached_property, DiskCacheMixin +from podpac.compositor import OrderedCompositor from podpac.core.data.datasource import COMMON_DATA_DOC -from podpac.core.node import cache_func -from podpac.core.node import NodeException -from podpac.core import cache +from podpac.core.utils import common_doc, _get_from_url -from . import nasaCMR +from podpac.datalib import nasaCMR COMMON_DOC = COMMON_DATA_DOC.copy() COMMON_DOC.update( { "smap_date": "str\n SMAP date string", "np_date": "np.datetime64\n Numpy date object", - "auth_class": ( - "EarthDataSession (Class object)\n Class used to make an authenticated session from a" - " username and password (both are defined in base class)" - ), - "auth_session": ( - "Instance(EarthDataSession)\n Authenticated session used to make http requests using" - "NASA Earth Data Login credentials" - ), "base_url": "str\n Url to nsidc openDAP server", - "layerkey": ( + "layer_key": ( "str\n Key used to retrieve data from OpenDAP dataset. This specifies the key used to retrieve " "the data" ), - "password": "User's EarthData password", - "username": "User's EarthData username", "product": "SMAP product name", "version": "Version number for the SMAP product", - "source_coordinates": """Returns the coordinates that uniquely describe each source - - Returns - ------- - :class:`podpac.Coordinates` - Coordinates that uniquely describe each source""", + "source_coordinates": "Coordinates that uniquely describe each source", "keys": """Available layers that are in the OpenDAP dataset Returns ------- List - The list of available keys from the OpenDAP dataset. Any of these keys can be set as self.datakey. + The list of available keys from the OpenDAP dataset. Any of these keys can be set as self.data_key. Notes ----- @@ -139,34 +125,7 @@ def np2smap_date(date): return date -def _get_from_url(url, auth_session): - """Helper function to get data from an NSIDC url with error checking. - - Parameters - ----------- - url: str - URL to website - auth_session: podpac.core.authentication.EarthDataSession - Authenticated EDS session - """ - try: - r = auth_session.get(url) - if r.status_code != 200: - _logger.warning("Could not connect to {}, status code {}".format(url, r.status_code)) - _logger.info("Trying to connect to {}".format(url.replace("opendap/", ""))) - r = auth_session.get(url.replace("opendap/", "")) - if r.status_code != 200: - _logger.error("Could not connect to {} to retrieve data, status code {}".format(url, r.status_code)) - raise RuntimeError("HTTP error: <%d>\n" % (r.status_code) + r.text[:4096]) - except requests.ConnectionError as e: - _logger.warning("Cannot connect to {}:".format(url) + str(e)) - r = None - except RuntimeError as e: - _logger.warning("Cannot authenticate to {}. Check credentials. Error was as follows:".format(url) + str(e)) - return r - - -def _infer_SMAP_product_version(product, base_url, auth_session): +def _infer_SMAP_product_version(product, base_url, session): """Helper function to automatically infer the version number of SMAP products in case user did not specify a version, or the version changed @@ -176,20 +135,20 @@ def _infer_SMAP_product_version(product, base_url, auth_session): Name of the SMAP product (e.g. one of SMAP_PRODUCT_DICT.keys()) base_url: str URL to base SMAP product page - auth_session: podpac.core.authentication.EarthDataSession - Authenticated EDS session + session: :class:`requests.Session` + Authenticated EDS session. Generally returned from :class:`SMAPSessionMixin`. """ - r = _get_from_url(base_url, auth_session) + r = _get_from_url(base_url, session=session) if r: m = re.search(product, r.text) return int(r.text[m.end() + 1 : m.end() + 4]) return int(SMAP_PRODUCT_MAP.sel(product=product, attr="default_version").item()) -# NOTE: {rdk} will be substituted for the entry's 'rootdatakey' +# NOTE: {rdk} will be substituted for the entry's 'root_data_key' SMAP_PRODUCT_DICT = { - #'.ver': ['latkey', 'lonkey', 'rootdatakey', 'layerkey' 'default_verison' + #'.ver': ['lat_key', 'lon_key', 'root_data_key', 'layer_key' 'default_verison' "SPL4SMAU": ["cell_lat", "cell_lon", "Analysis_Data_", "{rdk}sm_surface_analysis", 4], "SPL4SMGP": ["cell_lat", "cell_lon", "Geophysical_Data_", "{rdk}sm_surface", 4], "SPL3SMA": ["{rdk}latitude", "{rdk}longitude", "Soil_Moisture_Retrieval_Data_", "{rdk}soil_moisture", 3], @@ -211,7 +170,7 @@ def _infer_SMAP_product_version(product, base_url, auth_session): dims=["product", "attr"], coords={ "product": list(SMAP_PRODUCT_DICT.keys()), - "attr": ["latkey", "lonkey", "rootdatakey", "layerkey", "default_version"], + "attr": ["lat_key", "lon_key", "root_data_key", "layer_key", "default_version"], }, ) @@ -253,136 +212,225 @@ def SMAP_BASE_URL(): return BASE_URL +class SMAPSessionMixin(authentication.RequestsSessionMixin): + """SMAP requests authentication session. + Implements :class:`authentication.RequestsSessionMixin` with hostname specific to SMAP authentication. + Overrides the :meth:`requests.Session.rebuild_auth` method to handle authorization redirect from the Earthdata portal + """ + + hostname = "urs.earthdata.nasa.gov" + auth_required = True + product_url = SMAP_BASE_URL() + + @cached_property + def session(self): + """Requests Session object for making calls to remote `self.hostname` + See https://2.python-requests.org/en/master/api/#sessionapi + + Returns + ------- + :class:requests.Session + Requests Session class with `auth` attribute defined + """ + + s = self._create_session() + + # override `rebuild_auth` method + s.rebuild_auth = self._rebuild_auth + + return s + + def _rebuild_auth(self, prepared_request, response): + """ + Overrides from the library to keep headers when redirected to or from + the NASA auth host. + See https://2.python-requests.org/en/master/api/#requests.Session.rebuild_auth + + Parameters + ---------- + prepared_request : :class:`requests.Request` + See https://2.python-requests.org/en/master/api/#requests.Session.rebuild_auth + response : :class:`requests.Response` + See https://2.python-requests.org/en/master/api/#requests.Session.rebuild_auth + + Returns + ------- + None + """ + headers = prepared_request.headers + url = prepared_request.url + + if "Authorization" in headers: + original_parsed = requests.utils.urlparse(response.request.url) + redirect_parsed = requests.utils.urlparse(url) + + # delete Authorization headers if original and redirect do not match + # is not in product_url_regex + if ( + (original_parsed.hostname != redirect_parsed.hostname) + and redirect_parsed.hostname != self.hostname + and original_parsed.hostname != self.hostname + ): + + # parse product_url for hostname + product_url_hostname = requests.utils.urlparse(self.product_url).hostname + + # make all numbers in product_url_hostname wildcards + product_url_regex = ( + re.compile(re.sub(r"\d", r"\\d", product_url_hostname)) + if product_url_hostname is not None + else None + ) + + # if redirect matches product_url_regex, then allow the headers to stay + if product_url_regex is not None and product_url_regex.match(redirect_parsed.hostname): + pass + else: + del headers["Authorization"] + + return + + +class SMAPCompositor(OrderedCompositor): + """ + + Attributes + ---------- + sources : list + Source nodes, in order of preference. Later sources are only used where earlier sources do not provide data. + source_coordinates : :class:`podpac.Coordinates` + Coordinates that make each source unique. Must the same size as ``sources`` and single-dimensional. + shared_coordinates : :class:`podpac.Coordinates`. + Coordinates that are shared amongst all of the composited sources. + is_source_coordinates_complete : Bool + This flag is used to automatically construct coordinates as on optimization. Default is False. + For example, if the source coordinates could include the year-month-day of the source, but the actual source + also has hour-minute-second information, the source_coordinates is incomplete. + """ + + is_source_coordinates_complete = tl.Bool(False) + shared_coordinates = tl.Instance(Coordinates, allow_none=True, default_value=None) + + def select_sources(self, coordinates): + """Select sources based on requested coordinates, including setting coordinates, if possible. + + Parameters + ---------- + coordinates : :class:`podpac.Coordinates` + Coordinates to evaluate at compositor sources + + Returns + ------- + sources : :class:`np.ndarray` + Array of sources + + Notes + ----- + * If :attr:`source_coordinates` is defined, only sources that intersect the requested coordinates are selected. + * Sets sources :attr:`interpolation`. + * If source coordinates complete, sets sources :attr:`coordinates` as an optimization. + """ + + """ Optimization: . """ + + src_subset = super(SMAPCompositor, self).select_sources(coordinates) + + if self.is_source_coordinates_complete: + coords_subset = list(self.source_coordinates.intersect(coordinates, outer=True).coords.values())[0] + coords_dim = list(self.source_coordinates.dims)[0] + crs = self.source_coordinates.crs + for s, c in zip(src_subset, coords_subset): + nc = merge_dims( + [ + Coordinates(np.atleast_1d(c), dims=[coords_dim], crs=crs, validate_crs=False), + self.shared_coordinates, + ] + ) + s.set_coordinates(nc) + + return src_subset + + @common_doc(COMMON_DOC) -class SMAPSource(pydap_source.PyDAP): +class SMAPSource(SMAPSessionMixin, DiskCacheMixin, PyDAP): """Accesses SMAP data given a specific openDAP URL. This is the base class giving access to SMAP data, and knows how to extract the correct coordinates and data keys for the soil moisture data. Attributes ---------- - auth_class : {auth_class} - auth_session : {auth_session} date_file_url_re : SRE_Pattern Regular expression used to retrieve date from self.source (OpenDAP Url) date_time_file_url_re : SRE_Pattern Regular expression used to retrieve date and time from self.source (OpenDAP Url) - layerkey : str + layer_key : str Key used to retrieve data from OpenDAP dataset. This specifies the key used to retrieve the data nan_vals : list List of values that should be treated as no-data (these are replaced by np.nan) - rootdatakey : str + root_data_key : str String the prepends every or most keys for data in the OpenDAP dataset """ - auth_session = tl.Instance(authentication.EarthDataSession) - auth_class = tl.Type(authentication.EarthDataSession) - # Need to overwrite parent because of recursive definition - outputs = None - - @tl.default("auth_session") - def _auth_session_default(self): - session = self.auth_class(username=self.username, password=self.password, product_url=SMAP_BASE_URL()) - - # check url - try: - session.get(SMAP_BASE_URL()) - except Exception as e: - _logger.warning("Unknown exception: ", e) - return session + layer_key = tl.Unicode().tag(attr=True) + root_data_key = tl.Unicode().tag(attr=True) + nan_vals = [-9999.0] + cache_coordinates = tl.Bool(True) # date_url_re = re.compile('[0-9]{4}\.[0-9]{2}\.[0-9]{2}') date_time_file_url_re = re.compile("[0-9]{8}T[0-9]{6}") date_file_url_re = re.compile("[0-9]{8}") - rootdatakey = tl.Unicode() - - @tl.default("rootdatakey") + @tl.default("root_data_key") def _rootdatakey_default(self): - return SMAP_PRODUCT_MAP.sel(product=self.product, attr="rootdatakey").item() - - layerkey = tl.Unicode() + return SMAP_PRODUCT_MAP.sel(product=self.product, attr="root_data_key").item() - @tl.default("layerkey") + @tl.default("layer_key") def _layerkey_default(self): - return SMAP_PRODUCT_MAP.sel(product=self.product, attr="layerkey").item() - - nan_vals = [-9999.0] + return SMAP_PRODUCT_MAP.sel(product=self.product, attr="layer_key").item() @property def product(self): - """Returns the SMAP product from the OpenDAP Url + """SMAP product from the OpenDAP URL""" - Returns - ------- - str - {product} - """ src = self.source.split("/") return src[src.index("SMAP") + 1].split(".")[0] @property def version(self): - """Returns the SMAP product version from the OpenDAP Url - - Returns - ------- - int - {version} + """SMAP product version from the OpenDAP URL """ src = self.source.split("/") return int(src[src.index("SMAP") + 1].split(".")[1]) - @tl.default("datakey") - def _datakey_default(self): - return self.layerkey.format(rdk=self.rootdatakey) + @property + def data_key(self): + """PyDAP data_key, constructed from the layer_key and root_data_key""" + + return self.layer_key.format(rdk=self.root_data_key) @property - def latkey(self): - """The key used to retrieve the latitude + def lat_key(self): + """OpenDap dataset key for latitude. """ - Returns - ------- - str - OpenDap dataset key for latitude - """ - return SMAP_PRODUCT_MAP.sel(product=self.product, attr="latkey").item().format(rdk=self.rootdatakey) + return SMAP_PRODUCT_MAP.sel(product=self.product, attr="lat_key").item().format(rdk=self.root_data_key) @property - def lonkey(self): - """The key used to retrieve the latitude + def lon_key(self): + """OpenDap dataset key for longitude. """ - Returns - ------- - str - OpenDap dataset key for longitude - """ - return SMAP_PRODUCT_MAP.sel(product=self.product, attr="lonkey").item().format(rdk=self.rootdatakey) + return SMAP_PRODUCT_MAP.sel(product=self.product, attr="lon_key").item().format(rdk=self.root_data_key) - @common_doc(COMMON_DOC) - @cache_func("native.coordinates") - def get_native_coordinates(self): - """{get_native_coordinates} - """ - times = self.get_available_times() - ds = self.dataset - lons = np.array(ds[self.lonkey][:, :]) - lats = np.array(ds[self.latkey][:, :]) - lons[lons == self.nan_vals[0]] = np.nan - lats[lats == self.nan_vals[0]] = np.nan - lons = np.nanmean(lons, axis=0) - lats = np.nanmean(lats, axis=1) - coords = podpac.Coordinates([times, lats, lons], dims=["time", "lat", "lon"]) - return coords + @cached_property + def available_times(self): + """Retrieve the available times from the SMAP file. - def get_available_times(self): - """Retrieve the available times from the SMAP file. This is primarily based on the filename, but some products - have multiple times stored in a single file. + This is primarily based on the filename, but some products have multiple times stored in a single file. Returns ------- np.ndarray(dtype=np.datetime64) Available times in the SMAP source """ + m = self.date_time_file_url_re.search(self.source) if not m: m = self.date_file_url_re.search(self.source) @@ -392,6 +440,19 @@ def get_available_times(self): times = times + np.array([6, 18], "timedelta64[h]") return times + @common_doc(COMMON_DOC) + def get_coordinates(self): + """{get_coordinates} + """ + lons = np.array(self.dataset[self.lon_key][:, :]) + lats = np.array(self.dataset[self.lat_key][:, :]) + lons[lons == self.nan_vals[0]] = np.nan + lats[lats == self.nan_vals[0]] = np.nan + lons = np.nanmean(lons, axis=0) + lats = np.nanmean(lats, axis=1) + coords = Coordinates([self.available_times, lats, lons], dims=["time", "lat", "lon"]) + return coords + @common_doc(COMMON_DOC) def get_data(self, coordinates, coordinates_index): """{get_data} @@ -400,23 +461,23 @@ def get_data(self, coordinates, coordinates_index): s = tuple([slc for d, slc in zip(coordinates.dims, coordinates_index) if "time" not in d]) if "SM_P_" in self.source: d = self.create_output_array(coordinates) - am_key = self.layerkey.format(rdk=self.rootdatakey + "AM") - pm_key = self.layerkey.format(rdk=self.rootdatakey + "PM") + "_pm" + am_key = self.layer_key.format(rdk=self.root_data_key + "AM") + pm_key = self.layer_key.format(rdk=self.root_data_key + "PM") + "_pm" try: - t = self.native_coordinates.coords["time"][0] + t = self.coordinates.coords["time"][0] d.loc[dict(time=t)] = np.array(self.dataset[am_key][s]) except: pass try: - t = self.native_coordinates.coords["time"][1] + t = self.coordinates.coords["time"][1] d.loc[dict(time=t)] = np.array(self.dataset[pm_key][s]) except: pass else: - data = np.array(self.dataset[self.datakey][s]) + data = np.array(self.dataset[self.data_key][s]) d = self.create_output_array(coordinates, data=data.reshape(coordinates.shape)) return d @@ -445,15 +506,14 @@ class SMAPProperties(SMAPSource): 'SMAP_L4_SM_lmc_00000000T000000_Vv{latest_version}.h5') """ + source = tl.Unicode().tag(attr=True) file_url_re = re.compile(r"SMAP.*_[0-9]{8}T[0-9]{6}_.*\.h5") - source = tl.Unicode().tag(readonly=True) - @tl.default("source") def _property_source_default(self): - v = _infer_SMAP_product_version("SPL4SMLM", SMAP_BASE_URL(), self.auth_session) + v = _infer_SMAP_product_version("SPL4SMLM", SMAP_BASE_URL(), self.session) url = SMAP_BASE_URL() + "/SPL4SMLM.%03d/2015.03.31/" % (v) - r = _get_from_url(url, self.auth_session) + r = _get_from_url(url, session=self.session) if not r: return "None" n = self.file_url_re.search(r.text).group() @@ -498,23 +558,21 @@ def _property_source_default(self): ] ).tag(attr=True) - @tl.default("layerkey") + @tl.default("layer_key") def _layerkey_default(self): return "{rdk}" + self.property @common_doc(COMMON_DOC) - @cache_func("native.coordinates") - def get_native_coordinates(self): - """{get_native_coordinates} + def get_coordinates(self): + """{get_coordinates} """ - ds = self.dataset - lons = np.array(ds[self.lonkey][:, :]) - lats = np.array(ds[self.latkey][:, :]) + lons = np.array(self.dataset[self.lon_key][:, :]) + lats = np.array(self.dataset[self.lat_key][:, :]) lons[lons == self.nan_vals[0]] = np.nan lats[lats == self.nan_vals[0]] = np.nan lons = np.nanmean(lons, axis=0) lats = np.nanmean(lats, axis=1) - coords = podpac.Coordinates([lats, lons], dims=["lat", "lon"]) + coords = Coordinates([lats, lons], dims=["lat", "lon"]) return coords @@ -543,16 +601,12 @@ class SMAPWilt(SMAPProperties): @common_doc(COMMON_DOC) -class SMAPDateFolder(podpac.compositor.OrderedCompositor): +class SMAPDateFolder(SMAPSessionMixin, DiskCacheMixin, SMAPCompositor): """Compositor of all the SMAP source urls present in a particular folder which is defined for a particular date Attributes ---------- - auth_class : {auth_class} - auth_session : {auth_session} base_url : {base_url} - cache_native_coordinates : bool, optional - Default is False. If True, the native_coordinates will be cached to disk after being computed the first time date_time_url_re : SRE_Pattern Regular expression used to retrieve the date and time from the filename if file_url_re matches date_url_re : SRE_Pattern @@ -568,54 +622,19 @@ class SMAPDateFolder(podpac.compositor.OrderedCompositor): tile cover? latlon_url_re : SRE_Pattern Regular expression used to find the lat-lon coordinates associated with the file from the file name - layerkey : {layerkey} - password : {password} + layer_key : {layer_key} product : str {product} version : int {version} - username : {username} """ - auth_session = tl.Instance(authentication.EarthDataSession) - auth_class = tl.Type(authentication.EarthDataSession) - username = tl.Unicode(None, allow_none=True) - password = tl.Unicode(None, allow_none=True) - # Need to overwrite parent because of recursive definition - outputs = None - - @tl.validate("source_coordinates") - def _validate_source_coordinates(self, d): - # Need to overwrite parent because of recursive definition - return d["value"] - - @tl.default("cache_ctrl") - def _cache_ctrl_default(self): - # append disk store to default cache_ctrl if not present - default_ctrl = cache.get_default_cache_ctrl() - stores = default_ctrl._cache_stores - if not any(isinstance(store, cache.DiskCacheStore) for store in default_ctrl._cache_stores): - stores.append(cache.DiskCacheStore()) - return cache.CacheCtrl(stores) - - @tl.default("auth_session") - def _auth_session_default(self): - return self.auth_class(username=self.username, password=self.password, product_url=SMAP_BASE_URL()) - base_url = tl.Unicode().tag(attr=True) - - @tl.default("base_url") - def _base_url_default(self): - return SMAP_BASE_URL() - product = tl.Enum(SMAP_PRODUCT_MAP.coords["product"].data.tolist()).tag(attr=True) version = tl.Int(allow_none=True).tag(attr=True) - - @tl.default("version") - def _detect_product_version(self): - return _infer_SMAP_product_version(self.product, self.base_url, self.auth_session) - folder_date = tl.Unicode("").tag(attr=True) + layer_key = tl.Unicode().tag(attr=True) + latlon_delta = tl.Float(default_value=1.5).tag(attr=True) file_url_re = re.compile(r".*_[0-9]{8}T[0-9]{6}_.*\.h5") file_url_re2 = re.compile(r".*_[0-9]{8}_.*\.h5") @@ -623,30 +642,32 @@ def _detect_product_version(self): date_url_re = re.compile(r"[0-9]{8}") latlon_url_re = re.compile(r"[0-9]{3}[E,W][0-9]{2}[N,S]") - latlon_delta = tl.Float(default_value=1.5).tag(attr=True) + # list of attribute names, used by __repr__ and __str__ to display minimal info about the node + _repr_keys = ["product", "folder_date"] - cache_native_coordinates = tl.Bool(False) + @tl.default("base_url") + def _base_url_default(self): + return SMAP_BASE_URL() - layerkey = tl.Unicode() + @tl.default("version") + def _detect_product_version(self): + return _infer_SMAP_product_version(self.product, self.base_url, self.session) - @tl.default("layerkey") + @tl.default("layer_key") def _layerkey_default(self): - return SMAP_PRODUCT_MAP.sel(product=self.product, attr="layerkey").item() + return SMAP_PRODUCT_MAP.sel(product=self.product, attr="layer_key").item() - @tl.observe("layerkey") - def _layerkey_change(self, change): - if change["old"] != change["new"] and change["old"] != "": - for s in self.sources: - s.layerkey = change["new"] + @tl.default("shared_coordinates") + def _default_shared_coordinates(self): + """Coordinates that are shared by all files in the folder.""" - def __repr__(self): - rep = "{}".format("SMAP") - rep += "\n\tproduct: {}".format(self.product) - - return rep + if self.product in SMAP_INCOMPLETE_SOURCE_COORDINATES: + return None + coords = copy.deepcopy(self.sources[0].coordinates) + return coords.drop("time") - @property - def source(self): + @cached_property + def folder_url(self): """URL to OpenDAP dataset folder Returns @@ -656,20 +677,14 @@ def source(self): """ return "/".join([self.base_url, "%s.%03d" % (self.product, self.version), self.folder_date]) - @tl.default("sources") - def sources_default(self): - """SMAPSource objects pointing to URLs of specific SMAP files in the folder + @cached_property + def sources(self): + """SMAPSource objects pointing to URLs of specific SMAP files in the folder""" - Returns - ------- - np.ndarray(dtype=object(SMAPSource)) - Array of SMAPSource instances tied to specific SMAP files - """ # Swapped the try and except blocks. SMAP filenames may change version numbers, which causes cached source to # break. Hence, try to get the new source everytime, unless data is offline, in which case rely on the cache. try: - _, _, sources = self.get_available_coords_sources() - self.put_cache(sources, "sources", overwrite=True) + _, _, sources = self.available_coords_sources except: # No internet or authentication error try: sources = self.get_cache("sources") @@ -678,7 +693,9 @@ def sources_default(self): "Connection or Authentication error, and no disk cache to fall back on for determining sources." ) - b = self.source + "/" + else: + self.put_cache(sources, "sources", overwrite=True) + time_crds = self.source_coordinates["time"] if time_crds.is_monotonic and time_crds.is_uniform and time_crds.size > 1: tol = time_crds.coordinates[1] - time_crds.coordinates[0] @@ -687,21 +704,13 @@ def sources_default(self): tol = tol - tol tol = np.timedelta64(1, dtype=(tol.dtype)) - src_objs = [ - SMAPSource( - source=b + s, - auth_session=self.auth_session, - layerkey=self.layerkey, - interpolation={"method": "nearest", "time_tolerance": tol}, - ) - for s in sources - ] - return np.array(src_objs) + kwargs = {"layer_key": self.layer_key, "interpolation": {"method": "nearest", "time_tolerance": tol}} + return [SMAPSource(source="%s/%s" % (self.folder_url, s), **kwargs) for s in sources] - @tl.default("is_source_coordinates_complete") - def src_crds_complete_default(self): - """Flag use to optimize creation of native_coordinates. If the source_coordinates are complete, - native_coordinates can easily be reconstructed, and same with shared coordinates. + @property + def is_source_coordinates_complete(self): + """Flag use to optimize creation of coordinates. If the source_coordinates are complete, + coordinates can easily be reconstructed, and same with shared coordinates. Returns ------- @@ -710,11 +719,12 @@ def src_crds_complete_default(self): """ return self.product not in SMAP_INCOMPLETE_SOURCE_COORDINATES - def get_source_coordinates(self): - """{source_coordinates} - """ + @cached_property + def source_coordinates(self): + """{source_coordinates}""" + try: - times, latlon, _ = self.get_available_coords_sources() + times, latlon, _ = self.available_coords_sources except: try: return self.get_cache("source.coordinates") @@ -722,30 +732,17 @@ def get_source_coordinates(self): raise NodeException( "Connection or Authentication error, and no disk cache to fall back on for determining sources." ) - - if latlon is not None and latlon.size > 0: - crds = podpac.Coordinates([[times, latlon[:, 0], latlon[:, 1]]], dims=["time_lat_lon"]) else: - crds = podpac.Coordinates([times], dims=["time"]) - self.put_cache(crds, "source.coordinates", overwrite=True) - return crds - - @cache_func("shared.coordinates") - def get_shared_coordinates(self): - """Coordinates that are shared by all files in the folder. - - Returns - ------- - podpac.Coordinates - Coordinates shared by all files in the folder - """ - if self.product in SMAP_INCOMPLETE_SOURCE_COORDINATES: - return None - - coords = copy.deepcopy(self.sources[0].native_coordinates) - return coords.drop("time") + if latlon is not None and latlon.size > 0: + crds = Coordinates([[times, latlon[:, 0], latlon[:, 1]]], dims=["time_lat_lon"]) + else: + crds = Coordinates([times], dims=["time"]) + self.put_cache(crds, "source.coordinates", overwrite=True) + return crds - def get_available_coords_sources(self): + # TODO just return the elements, then return each actual thing separately + @cached_property + def available_coords_sources(self): """Read NSIDC site for available coordinate sources Returns @@ -762,12 +759,11 @@ def get_available_coords_sources(self): RuntimeError If the NSIDC website cannot be accessed """ - url = self.source - r = _get_from_url(url, self.auth_session) + r = _get_from_url(self.folder_url, self.session) if r is None: - _logger.warning("Could not contact {} to retrieve source coordinates".format(url)) + _logger.warning("Could not contact {} to retrieve source coordinates".format(self.folder_url)) return np.array([]), None, np.array([]) - soup = bs4.BeautifulSoup(r.text, "lxml") + soup = BeautifulSoup(r.text, "lxml") a = soup.find_all("a") file_regex = self.file_url_re file_regex2 = self.file_url_re2 @@ -820,124 +816,114 @@ def keys(self): """ return self.sources[0].keys - @property - def base_definition(self): - """ Definition for SMAP node. Sources not required as these are computed. - """ - d = super(podpac.compositor.Compositor, self).base_definition - d["interpolation"] = self.interpolation - return d - @common_doc(COMMON_DOC) -class SMAP(podpac.compositor.OrderedCompositor): +class SMAP(SMAPSessionMixin, DiskCacheMixin, SMAPCompositor): """Compositor of all the SMAPDateFolder's for every available SMAP date. Essentially a compositor of all SMAP data for a particular product. Attributes ---------- - auth_class : {auth_class} - auth_session : {auth_session} base_url : {base_url} date_url_re : SRE_Pattern Regular expression used to extract all folder dates (or folder names) for the particular SMAP product. - layerkey : {layerkey} - password : {password} + layer_key : {layer_key} product : str {product} - username : {username} """ - # Need to overwrite parent because of recursive definition - outputs = None base_url = tl.Unicode().tag(attr=True) + product = tl.Enum(SMAP_PRODUCT_MAP.coords["product"].data.tolist(), default_value="SPL4SMAU").tag(attr=True) + version = tl.Int(allow_none=True).tag(attr=True) + layer_key = tl.Unicode().tag(attr=True) + + date_url_re = re.compile(r"[0-9]{4}\.[0-9]{2}\.[0-9]{2}") - @tl.validate("source_coordinates") - def _validate_source_coordinates(self, d): - # Need to overwrite parent because of recursive definition - return d["value"] + _repr_keys = ["product"] @tl.default("base_url") def _base_url_default(self): return SMAP_BASE_URL() - product = tl.Enum(SMAP_PRODUCT_MAP.coords["product"].data.tolist(), default_value="SPL4SMAU").tag(attr=True) - version = tl.Int(allow_none=True).tag(attr=True) - @tl.default("version") def _detect_product_version(self): - return _infer_SMAP_product_version(self.product, self.base_url, self.auth_session) + return _infer_SMAP_product_version(self.product, self.base_url, self.session) - date_url_re = re.compile(r"[0-9]{4}\.[0-9]{2}\.[0-9]{2}") - - auth_session = tl.Instance(authentication.EarthDataSession) - auth_class = tl.Type(authentication.EarthDataSession) - username = tl.Unicode(None, allow_none=True) - password = tl.Unicode(None, allow_none=True) - - @tl.default("auth_session") - def _auth_session_default(self): - return self.auth_class(username=self.username, password=self.password, product_url=SMAP_BASE_URL()) + @tl.default("layer_key") + def _layerkey_default(self): + return SMAP_PRODUCT_MAP.sel(product=self.product, attr="layer_key").item() - layerkey = tl.Unicode() + @tl.default("shared_coordinates") + def _default_shared_coordinates(self): + """Coordinates that are shared by all files in the SMAP product family. - @tl.default("layerkey") - def _layerkey_default(self): - return SMAP_PRODUCT_MAP.sel(product=self.product, attr="layerkey").item() + Notes + ------ + For example, the gridded SMAP data have the same lat-lon coordinates in every file (global at some resolution), + and the only difference between files is the time coordinate. + This is not true for the SMAP-Sentinel product, in which case this function returns None + """ + if self.product in SMAP_INCOMPLETE_SOURCE_COORDINATES: + return None - @tl.observe("layerkey") - def _layerkey_change(self, change): - if change["old"] != change["new"] and change["old"] != "": - for s in self.sources: - s.layerkey = change["new"] + sample_source = SMAPDateFolder(product=self.product, version=self.version, folder_date=self.available_dates[0]) + return sample_source.shared_coordinates - def __repr__(self): - rep = "{}".format("SMAP") - rep += "\n\tproduct: {}".format(self.product) - rep += "\n\tinterpolation: {}".format(self.interpolation) + @cached_property + def available_dates(self): + """ Available dates in SMAP date format, sorted.""" + url = "/".join([self.base_url, "%s.%03d" % (self.product, self.version)]) + r = _get_from_url(url, self.session) + if r is None: + _logger.warning("Could not contact {} to retrieve source coordinates".format(url)) + return [] + soup = BeautifulSoup(r.text, "lxml") + matches = [self.date_url_re.match(a.get_text()) for a in soup.find_all("a")] + dates = [m.group() for m in matches if m] + return dates + + @cached_property + def sources(self): + """Array of SMAPDateFolder objects pointing to specific SMAP folders""" + + kwargs = { + "product": self.product, + "version": self.version, + "layer_key": self.layer_key, + "shared_coordinates": self.shared_coordinates, # this is an optimization + } + return [SMAPDateFolder(folder_date=date, **kwargs) for date in self.available_dates] - return rep + @common_doc(COMMON_DOC) + @cached_property + def source_coordinates(self): + """{source_coordinates} + """ + available_times = [np.datetime64(date.replace(".", "-")) for date in self.available_dates] + return Coordinates([available_times], dims=["time"]) @property - def source(self): - """The source is used for a unique name to cache SMAP products. + def base_ref(self): + """Summary Returns ------- - str - The SMAP product name. + TYPE + Description """ - return "%s.%03d" % (self.product, self.version) - - @tl.default("sources") - def sources_default(self): - """SMAPDateFolder objects pointing to specific SMAP folders + return "{0}_{1}".format(self.__class__.__name__, self.product) - Returns - ------- - np.ndarray(dtype=object(SMAPDateFolder)) - Array of SMAPDateFolder instances tied to specific SMAP folders + @property + @common_doc(COMMON_DOC) + def keys(self): + """{keys} """ - dates = self.get_available_times_dates()[1] - src_objs = np.array( - [ - SMAPDateFolder( - product=self.product, - version=self.version, - folder_date=date, - shared_coordinates=self.shared_coordinates, - auth_session=self.auth_session, - layerkey=self.layerkey, - ) - for date in dates - ] - ) - return src_objs + return self.sources[0].keys @common_doc(COMMON_DOC) def find_coordinates(self): """ - {native_coordinates} + {coordinates} Notes ----- @@ -946,78 +932,11 @@ def find_coordinates(self): if self.product in SMAP_IRREGULAR_COORDINATES: raise Exception("Native coordinates too large. Try using get_filename_coordinates_sources().") - shared = self.get_shared_coordinates() - partial_sources = self.get_source_coordinates()["time"].coordinates - complete_source_0 = self.sources[0].get_source_coordinates()["time"].coordinates + partial_sources = self.source_coordinates["time"].coordinates + complete_source_0 = self.sources[0].source_coordinates["time"].coordinates offset = complete_source_0 - partial_sources[0] full_times = (partial_sources[:, None] + offset[None, :]).ravel() - return [merge_dims([podpac.Coordinates([full_times], ["time"]), shared])] - - @common_doc(COMMON_DOC) - def get_source_coordinates(self): - """{source_coordinates} - """ - return podpac.Coordinates([self.get_available_times_dates()[0]], dims=["time"]) - - def get_available_times_dates(self): - """Returns the available folder dates in the SMAP product - - Returns - ------- - np.ndarray - Array of dates in numpy datetime64 format - list - list of dates in SMAP date format - - Raises - ------ - RuntimeError - If the http resource could not be accessed (check Earthdata login credentials) - """ - url = "/".join([self.base_url, "%s.%03d" % (self.product, self.version)]) - r = _get_from_url(url, self.auth_session) - if r is None: - _logger.warning("Could not contact {} to retrieve source coordinates".format(url)) - return np.array([]), [] - soup = bs4.BeautifulSoup(r.text, "lxml") - a = soup.find_all("a") - regex = self.date_url_re - times = [] - dates = [] - for aa in a: - m = regex.match(aa.get_text()) - if m: - times.append(np.datetime64(m.group().replace(".", "-"))) - dates.append(m.group()) - times.sort() - dates.sort() - return np.array(times), dates - - @cache_func("shared.coordinates") - def get_shared_coordinates(self): - """Coordinates that are shared by all files in the SMAP product family. - - Returns - ------- - podpac.Coordinates - Coordinates shared by all files in the SMAP product. - - Notes - ------ - For example, the gridded SMAP data have the same lat-lon coordinates in every file (global at some resolution), - and the only difference between files is the time coordinate. - This is not true for the SMAP-Sentinel product, in which case this function returns None - """ - if self.product in SMAP_INCOMPLETE_SOURCE_COORDINATES: - return None - - coords = SMAPDateFolder( - product=self.product, - version=self.version, - folder_date=self.get_available_times_dates()[1][0], - auth_session=self.auth_session, - ).shared_coordinates - return coords + return [podpac.coordinates.merge_dims([Coordinates([full_times], ["time"]), self.shared_coordinates])] def get_filename_coordinates_sources(self, bounds=None, update_cache=False): """Returns coordinates solely based on the filenames of the sources. This function was motivated by the @@ -1025,18 +944,18 @@ def get_filename_coordinates_sources(self, bounds=None, update_cache=False): Parameters ----------- - bounds: podpac.Coordinates, Optional + bounds: :class:`podpac.Coordinates`, Optional Default is None. Return the coordinates based on filenames of the source only within the specified bounds. When not None, the result is not cached. update_cache: bool, optional Default is False. The results of this call are automatically cached to disk. This function will try to - update the cache if new data arrives. Only set this flag to True to rebuild the entire index locally (which + update the cache if new data arrives. Only set this flag to True to rebuild_auth the entire index locally (which may be needed when version numbers in the filenames change). Returns ------- - podpac.Coordinates + :class:`podpac.Coordinates` Coordinates of all the sources in the product family Container Container that will generate an array of the SMAPSources pointing to unique OpenDAP urls corresponding to @@ -1083,14 +1002,11 @@ def latlonmap(x): # Restrict the query to any specified bounds if bounds: - kwargs["temporal"] = ",".join([str(b.astype("datetime64[s]")) for b in bounds["time"].area_bounds]) + kwargs["temporal"] = ",".join([str(b.astype("datetime64[s]")) for b in bounds["time"].bounds]) # Get CMR data filenames = nasaCMR.search_granule_json( - auth_session=self.auth_session, - entry_map=lambda x: x["producer_granule_id"], - short_name=self.product, - **kwargs, + session=self.session, entry_map=lambda x: x["producer_granule_id"], short_name=self.product, **kwargs ) if not filenames: return Coordinates([]), [], [] @@ -1112,7 +1028,7 @@ def latlonmap(x): return crds, filenames, dates # Create kwargs for making a SMAP source - create_kwargs = {"auth_session": self.auth_session, "layer_key": self.layerkey} + create_kwargs = {"layer_key": self.layer_key} if self.interpolation: create_kwargs["interpolation"] = self.interpolation @@ -1124,7 +1040,7 @@ def latlonmap(x): # Specify the bounds based on the last entry in the cached coordinates # Add a minute to the bounds to make sure we get unique coordinates kwargs = { - "temporal": str(crds["time"].area_bounds[-1].astype("datetime64[s]") + np.timedelta64(5, "m")) + "/" + "temporal": str(crds["time"].bounds[-1].astype("datetime64[s]") + np.timedelta64(5, "m")) + "/" } crds_new, filenames_new, dates_new = cmr_query(kwargs) @@ -1161,64 +1077,23 @@ def latlonmap(x): self.put_cache(crds, "filename.coordinates", overwrite=update_cache) self.put_cache(sources, "filename.sources", overwrite=update_cache) - # Update the auth_session and/or interpolation and/or other keyword arguments in the sources class + # Updates interpolation and/or other keyword arguments in the sources class sources.create_kwargs = create_kwargs return crds, sources - @property - def base_ref(self): - """Summary - - Returns - ------- - TYPE - Description - """ - return "{0}_{1}".format(self.__class__.__name__, self.product) - - @property - def base_definition(self): - """ Definition for SMAP node. Sources not required as these are computed. - """ - d = super(podpac.compositor.Compositor, self).base_definition - d["interpolation"] = self.interpolation - return d - - @property - @common_doc(COMMON_DOC) - def keys(self): - """{keys} - """ - return self.sources[0].keys - -class SMAPBestAvailable(podpac.compositor.OrderedCompositor): +class SMAPBestAvailable(OrderedCompositor): """Compositor of SMAP-Sentinel and the Level 4 SMAP Analysis Update soil moisture """ - @tl.default("sources") - def sources_default(self): - """Orders the compositor of SPL2SMAP_S in front of SPL4SMAU - - Returns - ------- - np.ndarray(dtype=object(SMAP)) - Array of SMAP product sources - """ - src_objs = np.array( - [ - SMAP(interpolation=self.interpolation, product="SPL2SMAP_S"), - SMAP(interpolation=self.interpolation, product="SPL4SMAU"), - ] - ) - return src_objs - - def __repr__(self): - rep = "{}".format("SMAP (Best Available)") - return rep + @cached_property + def sources(self): + """Orders the compositor of SPL2SMAP_S in front of SPL4SMAU. """ - def get_shared_coordinates(self): - return None # NO shared coordiantes + return [ + SMAP(interpolation=self.interpolation, product="SPL2SMAP_S"), + SMAP(interpolation=self.interpolation, product="SPL4SMAU"), + ] class GetSMAPSources(object): @@ -1241,13 +1116,9 @@ def __getitem__(self, slc): source_urls = [base_url + np2smap_date(d)[:10] + "/" + f for d, f in zip(self.dates[slc], self.filenames[slc])] return np.array([SMAPSource(source=s, **self.create_kwargs) for s in source_urls], object)[return_slice] - @property + @cached_property def base_url(self): - if not self._base_url: - self._base_url = SMAPDateFolder( - product=self.product, folder_date="00001122", auth_session=self.create_kwargs["auth_session"] - ).source[:-8] - return self._base_url + return SMAPDateFolder(product=self.product, folder_date="00001122").folder_url[:-8] def __len__(self): return len(self.filenames) @@ -1259,3 +1130,67 @@ def intersect(self, I): dates=[self.dates[i] for i in I], create_kwargs=self.create_kwargs, ) + + +if __name__ == "__main__": + import getpass + from matplotlib import pyplot + import podpac + + logging.basicConfig() + + product = "SPL4SMAU" + interpolation = {"method": "nearest", "params": {"time_tolerance": np.timedelta64(2, "h")}} + + sm = SMAP(product=product, interpolation=interpolation) + + # username = input("Username: ") + # password = getpass.getpass("Password: ") + # sm.set_credentials(username=username, password=password) + + # SMAP info + print(sm) + print("SMAP Definition:", sm.json_pretty) + print( + "SMAP available_dates:", + "%s - %s (%d)" % (sm.available_dates[0], sm.available_dates[1], len(sm.available_dates)), + ) + print("SMAP source_coordinates:", sm.source_coordinates) + print("SMAP shared_coordinates:", sm.shared_coordinates) + print("Sources:", sm.sources[:3], "... (%d)" % len(sm.sources)) + + # sample SMAPDateFolder info + sm_datefolder = sm.sources[0] + print("Sample DateFolder:", sm_datefolder) + print("Sample DateFolder Definition:", sm_datefolder.json_pretty) + print("Sample DateFolder source_coordinates:", sm_datefolder.source_coordinates) + print("Sample DateFolder Sources:", sm_datefolder.sources[:3], "... (%d)" % len(sm_datefolder.sources)) + + # sample SMAPSource info + sm_source = sm_datefolder.sources[0] + print("Sample DAP Source:", sm_source) + print("Sample DAP Source Definition:", sm_source.json_pretty) + print("Sample DAP Native Coordinates:", sm_source.coordinates) + + print("Another Sample DAP Native Coordinates:", sm_datefolder.sources[1].coordinates) + + # eval whole world + c_world = Coordinates( + [podpac.crange(90, -90, -2.0), podpac.crange(-180, 180, 2.0), "2018-05-19T12:00:00"], + dims=["lat", "lon", "time"], + ) + o = sm.eval(c_world) + o.plot(cmap="gist_earth_r") + pyplot.axis("scaled") + + # eval points over time + lat = [45.0, 45.0, 0.0, 45.0] + lon = [-100.0, 20.0, 20.0, 100.0] + c_pts = Coordinates([[lat, lon], podpac.crange("2018-05-15T00", "2018-05-19T00", "3,h")], dims=["lat_lon", "time"]) + + o = sm.eval(c_pts) + # sm.threaded = False + pyplot.plot(ot.time, ot.data.T) + + pyplot.show() + print("Done") diff --git a/podpac/datalib/smap_egi.py b/podpac/datalib/smap_egi.py index 4cdb0259e..82b698727 100644 --- a/podpac/datalib/smap_egi.py +++ b/podpac/datalib/smap_egi.py @@ -33,15 +33,12 @@ def isnat(a): np.isnat = isnat # Internal dependencies -import podpac -import podpac.datalib -from podpac.core.coordinates import Coordinates +from podpac import Coordinates, UnitsDataArray, cached_property from podpac.datalib import EGI -from podpac.core.units import UnitsDataArray SMAP_PRODUCT_DICT = { - #'shortname': ['lat_key', 'lon_key', 'data_key', 'quality_flag', 'default_verison'] - "SPL4SMAU": ["/x", "/y", "/Analysis_Data/sm_surface_analysis", None, 4], + #'shortname': ['lat_key', 'lon_key', '_data_key', 'quality_flag', 'default_verison'] + "SPL4SMAU": ["/x", "/y", "/Analysis_Data/sm_surface_analysis", None, None], "SPL4SMGP": ["/x", "/y", "/Geophysical_Data/sm_surface", None, 4], "SPL4SMLM": ["/x", "/y", "/Land_Model_Constants_Data", None, 4], "SPL3SMAP": [ @@ -49,44 +46,45 @@ def isnat(a): "/Soil_Moisture_Retrieval_Data/longitude", "/Soil_Moisture_Retrieval_Data/soil_moisture", "/Soil_Moisture_Retrieval_Data/retrieval_qual_flag", - 3, + "003", ], "SPL3SMA": [ "/Soil_Moisture_Retrieval_Data/latitude", "/Soil_Moisture_Retrieval_Data/longitude", "/Soil_Moisture_Retrieval_Data/soil_moisture", "/Soil_Moisture_Retrieval_Data/retrieval_qual_flag", - 3, + "003", ], "SPL3SMP_AM": [ "/Soil_Moisture_Retrieval_Data_AM/latitude", "/Soil_Moisture_Retrieval_Data_AM/longitude", "/Soil_Moisture_Retrieval_Data_AM/soil_moisture", "/Soil_Moisture_Retrieval_Data_AM/retrieval_qual_flag", - 5, + "005", ], "SPL3SMP_PM": [ "/Soil_Moisture_Retrieval_Data_PM/latitude", "/Soil_Moisture_Retrieval_Data_PM/longitude", "/Soil_Moisture_Retrieval_Data_PM/soil_moisture_pm", "/Soil_Moisture_Retrieval_Data_PM/retrieval_qual_flag_pm", - 5, + "005", ], "SPL3SMP_E_AM": [ "/Soil_Moisture_Retrieval_Data_AM/latitude", "/Soil_Moisture_Retrieval_Data_AM/longitude", "/Soil_Moisture_Retrieval_Data_AM/soil_moisture", "/Soil_Moisture_Retrieval_Data_AM/retrieval_qual_flag", - 3, + "003", ], "SPL3SMP_E_PM": [ "/Soil_Moisture_Retrieval_Data_PM/latitude_pm", "/Soil_Moisture_Retrieval_Data_PM/longitude_pm", "/Soil_Moisture_Retrieval_Data_PM/soil_moisture_pm", "/Soil_Moisture_Retrieval_Data_PM/retrieval_qual_flag_pm", - 3, + "003", ], } + SMAP_PRODUCTS = list(SMAP_PRODUCT_DICT.keys()) @@ -111,41 +109,49 @@ class SMAP(EGI): min_bounds_span = tl.Dict(default_value={"lon": 0.3, "lat": 0.3, "time": "3,h"}).tag(attr=True) check_quality_flags = tl.Bool(True).tag(attr=True) quality_flag_key = tl.Unicode(allow_none=True).tag(attr=True) + data_key = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - # set default short_name, data_key, lat_key, lon_key, version - @tl.default("short_name") - def _short_name_default(self): + @property + def short_name(self): if "SPL3SMP" in self.product: return self.product.replace("_AM", "").replace("_PM", "") else: return self.product - @tl.default("lat_key") - def _lat_key_default(self): - return SMAP_PRODUCT_DICT[self.product][0] + # pull _data_key, lat_key, lon_key, and version from product dict + @cached_property + def _product_data(self): + return SMAP_PRODUCT_DICT[self.product] - @tl.default("lon_key") - def _lon_key_default(self): - return SMAP_PRODUCT_DICT[self.product][1] + @property + def lat_key(self): + return self._product_data[0] - @tl.default("quality_flag_key") - def _quality_flag_key_default(self): - return SMAP_PRODUCT_DICT[self.product][3] + @property + def lon_key(self): + return self._product_data[1] - @tl.default("data_key") - def _data_key_default(self): - return SMAP_PRODUCT_DICT[self.product][2] + @property + def _data_key(self): + if self.data_key is None: + return self._product_data[2] + else: + return self.data_key + + @property + def quality_flag_key(self): + return self._product_data[3] + + @property + def version(self): + return self._product_data[4] @property def coverage(self): if self.quality_flag_key: - return (self.data_key, self.quality_flag_key, self.lat_key, self.lon_key) + return (self._data_key, self.quality_flag_key, self.lat_key, self.lon_key) else: - return (self.data_key, self.lat_key, self.lon_key) - - @tl.default("version") - def _version_default(self): - return SMAP_PRODUCT_DICT[self.product][4] + return (self._data_key, self.lat_key, self.lon_key) def read_file(self, filelike): """Interpret individual SMAP file from EGI zip archive. @@ -163,10 +169,10 @@ def read_file(self, filelike): ------ ValueError """ - ds = h5py.File(filelike) + ds = h5py.File(filelike, "r") # handle data - data = ds[self.data_key][()] + data = ds[self._data_key][()] if self.check_quality_flags and self.quality_flag_key: flag = ds[self.quality_flag_key][()] @@ -236,8 +242,8 @@ def append_file(self, all_data, data): NotImplementedError """ if all_data.shape[1:] == data.shape[1:]: - data.lat.data = all_data.lat.data - data.lon.data = all_data.lon.data + data.lat.data[:] = all_data.lat.data + data.lon.data[:] = all_data.lon.data else: # select only data with finite coordinates data = data.isel(lon=np.isfinite(data.lon), lat=np.isfinite(data.lat)) @@ -256,7 +262,29 @@ def append_file(self, all_data, data): lon.data[Ilon] = data.lon[Ilon] # Assign to data - data.lon.data = lon.data - data.lat.data = lat.data + data.lon.data[:] = lon.data + data.lat.data[:] = lat.data return all_data.combine_first(data) + + +if __name__ == "__main__": + import logging + import getpass + from podpac import Coordinates, clinspace + + logger = logging.getLogger() + logger.setLevel(logging.DEBUG) + + username = input("Username:") + password = getpass.getpass("Password:") + + # level 3 access + c = Coordinates( + [clinspace(-82, -81, 10), clinspace(38, 39, 10), clinspace("2015-07-06", "2015-07-08", 10)], + dims=["lon", "lat", "time"], + ) + + node = SMAP(product="SPL3SMP_AM", username=username, password=password) + output = node.eval(c) + print(output) diff --git a/podpac/datalib/terraintiles.py b/podpac/datalib/terraintiles.py index c759554a9..3b3a72628 100644 --- a/podpac/datalib/terraintiles.py +++ b/podpac/datalib/terraintiles.py @@ -44,13 +44,7 @@ from podpac.data import Rasterio from podpac.compositor import OrderedCompositor from podpac.interpolators import Rasterio as RasterioInterpolator, ScipyGrid, ScipyPoint -from podpac.data import interpolation_trait - -from lazy_import import lazy_module - -# optional imports -s3fs = lazy_module("s3fs") -rasterio = lazy_module("rasterio") +from podpac.data import InterpolationTrait #### # private module attributes @@ -58,7 +52,6 @@ # create log for module _logger = logging.getLogger(__name__) -_s3 = s3fs.S3FileSystem(anon=True) class TerrainTilesSource(Rasterio): @@ -75,15 +68,13 @@ class TerrainTilesSource(Rasterio): rasterio dataset """ - outputs = None - # parameters source = tl.Unicode().tag(readonly=True) # attributes - interpolation = interpolation_trait( + interpolation = InterpolationTrait( default_value={"method": "nearest", "interpolators": [RasterioInterpolator, ScipyGrid, ScipyPoint]} - ).tag(readonly=True) + ).tag(attr=True) @tl.default("crs") def _default_crs(self): @@ -94,45 +85,6 @@ def _default_crs(self): if "normal" in self.source: return "EPSG:3857" - @tl.default("dataset") - def open_dataset(self): - """Opens the data source""" - - cache_key = "fileobj" - with rasterio.MemoryFile() as f: - - # load data from cache - if self.cache_ctrl and self.has_cache(key=cache_key): - _logger.debug("Retrieving terrain tile {} from cache'".format(self.source)) - data = self.get_cache(key=cache_key) - f.write(data) - - else: - - # try finding local file first - try: - with open(self.source, "rb") as localfile: - data = localfile.read() - - # download and put in cache - except FileNotFoundError: - _logger.info("Downloading S3 fileobj: {}".format(self.source)) - with _s3.open(self.source, "rb") as s3file: - data = s3file.read() - - # write to memory file - f.write(data) - - # put data in the cache - _logger.debug("Caching terrain tile {} in key 'fileobj'".format(self.source)) - self.cache_ctrl # confirm this is initialized - self.put_cache(data, key=cache_key) - - f.seek(0) - dataset = f.open() - - return dataset - def get_data(self, coordinates, coordinates_index): data = super(TerrainTilesSource, self).get_data(coordinates, coordinates_index) data.data[data.data < 0] = np.nan @@ -152,7 +104,7 @@ def download(self, path="terraintiles"): """ filename = os.path.split(self.source)[1] # get filename off of source - joined_path = os.path.join(path, os.path.split(self.source)[0]) # path to file + joined_path = os.path.join(path, os.path.split(self.source)[0].replace("s3://", "")) # path to file filepath = os.path.abspath(os.path.join(joined_path, filename)) # make the directory if it hasn't been made already @@ -161,7 +113,7 @@ def download(self, path="terraintiles"): # download the file _logger.debug("Downloading terrain tile {} to filepath: {}".format(self.source, filepath)) - _s3.get(self.source, filepath) + self.s3.get(self.source, filepath) class TerrainTiles(OrderedCompositor): @@ -195,27 +147,11 @@ class TerrainTiles(OrderedCompositor): Defaults to 'elevation-tiles-prod' """ - outputs = None - # parameters zoom = tl.Int(default_value=6).tag(attr=True) tile_format = tl.Enum(["geotiff", "terrarium", "normal"], default_value="geotiff").tag(attr=True) bucket = tl.Unicode(default_value="elevation-tiles-prod").tag(attr=True) - - @tl.default("sources") - def _default_sources(self): - return np.array([]) - - @property - def source(self): - """ - S3 Bucket source of TerrainTiles - - Returns - ------- - str - """ - return self.bucket + sources = None # these are loaded as needed def select_sources(self, coordinates): # get all the tile sources for the requested zoom level and coordinates @@ -223,7 +159,6 @@ def select_sources(self, coordinates): # create TerrainTilesSource classes for each url source self.sources = np.array([self._create_source(source) for source in sources]) - return self.sources def download(self, path="terraintiles"): @@ -244,7 +179,7 @@ def download(self, path="terraintiles"): raise ValueError("No terrain tile sources selected. Evaluate node at coordinates to select sources.") def _create_source(self, source): - return TerrainTilesSource(source="{}/{}".format(self.bucket, source)) + return TerrainTilesSource(source="s3://{}/{}".format(self.bucket, source), cache_ctrl=self.cache_ctrl) ############ @@ -480,3 +415,26 @@ def _mercator_to_tilespace(xm, ym, zoom): y = int(tiles * (np.pi - ym) / diameter) return x, y + + +if __name__ == "__main__": + from podpac import Coordinates, clinspace + + c = Coordinates([clinspace(40, 43, 1000), clinspace(-76, -72, 1000)], dims=["lat", "lon"]) + + print("TerrainTiles") + node = TerrainTiles(tile_format="geotiff", zoom=8) + output = node.eval(c) + print(output) + + print("TerrainTiles cached") + node = TerrainTiles(tile_format="geotiff", zoom=8, cache_ctrl=["ram", "disk"]) + output = node.eval(c) + print(output) + + # tile urls + print("get tile urls") + print(np.array(get_tile_urls("geotiff", 1))) + print(np.array(get_tile_urls("geotiff", 9, coordinates=c))) + + print("done") diff --git a/podpac/datalib/test/test_smap.py b/podpac/datalib/test/test_smap.py new file mode 100644 index 000000000..64c3bbba3 --- /dev/null +++ b/podpac/datalib/test/test_smap.py @@ -0,0 +1,80 @@ +import pytest + +import requests +import traitlets as tl + +import podpac.datalib +from podpac.datalib import smap + +from podpac import settings + +# dummy class mixing in custom Earthdata requests Session +class SomeSmapNode(smap.SMAPSessionMixin): + pass + + +class TestSMAPSessionMixin(object): + url = "urs.earthdata.nasa.gov" + + def test_hostname(self): + node = SomeSmapNode() + assert node.hostname == self.url + + def test_auth_required(self): + # make sure auth is deleted from setttings, if it was already there + + # auth required + with settings: + if "username@urs.earthdata.nasa.gov" in settings: + del settings["username@urs.earthdata.nasa.gov"] + + if "password@urs.earthdata.nasa.gov" in settings: + del settings["password@urs.earthdata.nasa.gov"] + + node = SomeSmapNode() + + # throw auth error + with pytest.raises(ValueError, match="username"): + node.session + + node.set_credentials(username="testuser", password="testpass") + + assert node.session + assert node.session.auth == ("testuser", "testpass") + assert isinstance(node.session, requests.Session) + + def test_set_credentials(self): + with settings: + node = SomeSmapNode() + node.set_credentials(username="testuser", password="testpass") + + assert settings["username@{}".format(self.url)] == "testuser" + assert settings["password@{}".format(self.url)] == "testpass" + + def test_session(self): + with settings: + + node = SomeSmapNode() + node.set_credentials(username="testuser", password="testpass") + + assert node.session + assert node.session.auth == ("testuser", "testpass") + assert isinstance(node.session, requests.Session) + + def test_earth_data_session_rebuild_auth(self): + class Dum(object): + pass + + with settings: + node = SomeSmapNode() + node.set_credentials(username="testuser", password="testpass") + + prepared_request = Dum() + prepared_request.headers = {"Authorization": 0} + prepared_request.url = "https://example.com" + + response = Dum() + response.request = Dum() + response.request.url = "https://example2.com" + + node.session.rebuild_auth(prepared_request, response) diff --git a/podpac/datalib/weathercitizen.py b/podpac/datalib/weathercitizen.py new file mode 100644 index 000000000..52d231be6 --- /dev/null +++ b/podpac/datalib/weathercitizen.py @@ -0,0 +1,685 @@ +""" +Weather Citizen + +Crowd sourced environmental observations from mobile devices (https://weathercitizen.org) + +- Documentation: https://weathercitizen.org/docs +- API: https://api.weathercitizen.org + +Requires + +- requests: `pip install requests` +- pandas: `pip install pandas` + +Optionally: + +- read_protobuf: `pip install read-protobuf` - decodes sensor burst media files +""" + +import json +from datetime import datetime +from datetime import timedelta +import logging +from copy import deepcopy + +import traitlets as tl +import pandas as pd +import numpy as np +import requests + +from podpac.data import DataSource +from podpac.core.data.datasource import COMMON_DATA_DOC +from podpac.core.utils import common_doc, trait_is_defined +from podpac.core.coordinates import Coordinates, UniformCoordinates1d, ArrayCoordinates1d, StackedCoordinates + + +URL = "https://api.weathercitizen.org/" +DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" # always UTC (ISO 8601 / RFC 3339 format) + +# create log for module +_logger = logging.getLogger(__name__) + + +class WeatherCitizen(DataSource): + """DataSource to handle WeatherCitizen data + + Attributes + ---------- + source : str + Collection (database) to pull data from. + Defaults to "geosensors" which is the primary data collection + data_key : str, int + Data key of interest, default "properties.pressure" + uuid : str, list(str), options + String or list of strings to filter data by uuid + device : str, list(str), ObjectId, list(ObjectId), optional + String or list of strings to filter data by device object id + version : string, list(str), optional + String or list of strings to filter data to filter data by WeatherCitizen version + query : dict, optional + Arbitrary pymongo query to apply to data. + Note that certain fields in this query may be overriden if other keyword arguments are specified + verbose : bool, optional + Display log messages or progress + """ + + source = tl.Unicode(allow_none=True, default_value="geosensors") + data_key = tl.Unicode(allow_none=True, default_value="properties.pressure").tag(attr=True) + uuid = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + device = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + version = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + query = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) + verbose = tl.Bool(allow_none=True, default_value=True).tag(attr=True) + override_limit = tl.Bool(allow_none=True, default_value=False).tag(attr=True) + + @common_doc(COMMON_DATA_DOC) + def get_coordinates(self): + """{get_coordinates} + """ + + # TODO: how to limit data retrieval for large queries? + + # query parameters + start_time = datetime(2016, 1, 1, 1, 0, 0) # before WeatherCitizen existed + projection = {"properties.time": 1, "geometry.coordinates": 1} + + # make sure data_key exists in dataset + query = {self.data_key: {"$exists": True}} + + # handle if the user specifies and query and the data_key is already in that query + if self.query is not None and self.data_key in self.query: + query = deepcopy(self.query) + query[self.data_key]["$exists"] = True + + # check the length of the matched items + length = get( + collection=self.source, + start_time=start_time, + uuid=self.uuid, + device=self.device, + version=self.version, + query=query, + projection=projection, + verbose=self.verbose, + return_length=True, + ) + + # add some kind of stop on querying above a certain length? + if length > 10000 and not self.override_limit: + raise ValueError( + "More than {} data points match this WeatherCitizen query. Please reduce the scope of your query.".format( + length + ) + ) + + items = get( + collection=self.source, + start_time=start_time, + uuid=self.uuid, + device=self.device, + version=self.version, + query=query, + projection=projection, + verbose=self.verbose, + ) + + lat = [item["geometry"]["coordinates"][1] for item in items] + lon = [item["geometry"]["coordinates"][0] for item in items] + time = [item["properties"]["time"] for item in items] + + return Coordinates([[lat, lon, time]], dims=["lat,lon,time"]) + + @common_doc(COMMON_DATA_DOC) + def get_data(self, coordinates, coordinates_index): + """{get_data} + """ + + # TODO: how to limit data retrieval for large queries? + + # default coordinate bounds for queries + time_bounds = [datetime(2016, 1, 1, 1, 0, 0), None] # before WeatherCitizen existed + lat_bounds = [-90, 90] + lon_bounds = [-180, 180] + + # override bounds + if "time" in coordinates.udims: + time_bounds = coordinates["time"].bounds + if "lat" in coordinates.udims: + lat_bounds = coordinates["lat"].bounds + if "lon" in coordinates.udims: + lon_bounds = coordinates["lon"].bounds + + box = [[lon_bounds[0], lat_bounds[0]], [lon_bounds[1], lat_bounds[1]]] + + # make sure data_key exists in dataset + query = {self.data_key: {"$exists": True}} + + # handle if the user specifies and query and the data_key is already in that query + if self.query is not None and self.data_key in self.query: + query = deepcopy(self.query) + query[self.data_key]["$exists"] = True + + # only project data key + projection = {self.data_key: 1} + + # check the length of the matched items + length = get( + collection=self.source, + start_time=time_bounds[0], + end_time=time_bounds[1], + box=box, + uuid=self.uuid, + device=self.device, + version=self.version, + query=query, + projection=projection, + verbose=self.verbose, + return_length=True, + ) + + # add some kind of stop on querying above a certain length? + if length > 10000 and not self.override_limit: + raise ValueError( + "More than {} data points match this WeatherCitizen query. Please reduce the scope of your query.".format( + length + ) + ) + + items = get( + collection=self.source, + start_time=time_bounds[0], + end_time=time_bounds[1], + box=box, + uuid=self.uuid, + device=self.device, + version=self.version, + query=query, + projection=projection, + verbose=self.verbose, + ) + + data = np.array([item[self.data_key] for item in items]) + + return self.create_output_array(coordinates, data=data) + + +############## +# Standalone functions +############## +def get( + collection="geosensors", + start_time=None, + end_time=None, + box=None, + near=None, + uuid=None, + device=None, + version=None, + query=None, + projection=None, + verbose=False, + dry_run=False, + return_length=False, +): + """Get documents from the server for devices in a timerange + + Parameters + ---------- + collection : str, list(str) + Collection(s) to query + start_time : str, datetime, optional + String or datetime for start of timerange (>=). + Defaults to 1 hour ago. + This input must be compatible with pandas `pd.to_datetime(start_time, utc=True)` + Input assumes UTC by default, but will recognize timezone string EDT, UTC, etc. For example "2019-09-01 08:00 EDT" + end_time : str, datetime, optional + Same as `start_time` but specifies end of time range (<). + Defaults to now. + box : list(list(float)), optional + Geo bounding box described as 2-d array of bottom-left and top-right corners. + If specified, `near` will be ignored. + Contents: [[ , (bottom left coordinates) ], [ , (upper right coordinates) ]] + For example: [[-83, 36], [-81, 34]] + near : tuple([float, float], int), optional + Geo bounding box described as 2-d near with a center point and a radius (km) from center point. + This input will be ignored if box is defined. + Contents: ([, ], ) + For example: ([-72.544655, 40.932559], 16000) + uuid : str, list(str), options + String or list of strings to filter data by uuid + device : str, list(str), ObjectId, list(ObjectId), optional + String or list of strings to filter data by device object id + version : string, list(str), optional + String or list of strings to filter data to filter data by WeatherCitizen version + query : dict, optional + Arbitrary pymongo query to apply to data. + Note that certain fields in this query may be overriden if other keyword arguments are specified + projection: dict, optional + Specify what fields should or should not be returned. + Dict keys are field names. + Dict values should be set to 1 to include field (and exclude all others) or set to 0 to exclude field and include all others + verbose : bool, optional + Display log messages or progress + dry_run : bool, optional + Return urls of queries instead of the actual query. + Returns a list of str with urls for each collections. + Defaults to False. + return_length : bool, optional + Return length of the documents that match the query + + Returns + ------- + list + List of items from server matching query. + If `dry_run` is True, returns a list or url strings for query. + """ + + # always make collection a list + if isinstance(collection, str): + collection = [collection] + + # get query string for each collection in list + query_strs = [ + _build_query( + collection=coll, + start_time=start_time, + end_time=end_time, + box=box, + near=near, + uuid=uuid, + device=device, + version=version, + query=query, + projection=projection, + ) + for coll in collection + ] + + # dry run + if dry_run: + return query_strs + + if verbose: + print("Querying WeatherCitizen API") + + # only return the length of the matched documents + if return_length: + length = 0 + for query_str in query_strs: + length += _get(query_str, verbose=verbose, return_length=return_length) + + if verbose: + print("Returned {} records".format(length)) + + return length + + # start query at page 0 with no items + # iterate through collections aggregating items + items = [] + for query_str in query_strs: + items += _get(query_str, verbose=verbose) + + if verbose: + print("\r") + print("Downloaded {} records".format(len(items))) + + return items + + +def get_record(collection, obj_id, url=URL): + """Get a single record from a collection by obj_id + + Parameters + ---------- + collection : str + Collection name + obj_id : str + Object id + """ + + # check url + if url[-1] != "/": + url = "{}/".format(url) + + # query the server + r = requests.get(url + collection + "/" + obj_id) + + if r.status_code != 200: + raise ValueError("Failed to query the server with status {}.\n\nResponse:\n {}".format(r.status_code, r.text)) + + return r.json() + + +def get_file(media, save=False, output_path=None): + """Get media file + + Parameters + ---------- + media : str, dict + Media record or media record object id in the media or geomedia collections. + save : bool, optional + Save to file + output_path : None, optional + If save is True, output the file to different file path + + Returns + ------- + bytes + If output_path is None, returns raw file content as bytes + + Raises + ------ + ValueError + Description + """ + + if isinstance(media, str): + media_id = media + elif isinstance(media, dict): + media_id = media["_id"] + + try: + record = get_record("media", media_id) + except ValueError: + try: + record = get_record("geomedia", media_id) + + except ValueError: + raise ValueError("Media id {} not found in the database".format(media_id)) + + # get file + r = requests.get(record["file"]["url"]) + + if r.status_code != 200: + raise ValueError( + "Failed to download binary data with status code {}.\n\nResponse:\n {}".format(r.status_code, r.text) + ) + + # save to file if output_path is not None + if save: + if output_path is None: + output_path = record["properties"]["filename"] + with open(output_path, "wb") as f: + f.write(r.content) + else: + return r.content + + +def read_sensorburst(media): + """Download and read sensorburst records. + + Requires: + - read-protobuf: `pip install read-protobuf` + - sensorburst_pb2: Download from https://api.weathercitizen.org/static/sensorburst_pb2.py + - Once downloaded, put this file in the directory as your analysis + + Parameters + ---------- + media : str, dict, list of str, list of dict + Media record(s) or media record object id(s) in the media or geomedia collections. + + Returns + ------- + pd.DataFrame + Returns pandas dataframe of records + """ + + try: + from read_protobuf import read_protobuf + except ImportError: + raise ImportError( + "Reading sensorburst requires `read_protobuf` module. Install using `pip install read-protobuf`." + ) + + # import sensorburst definition + try: + from podpac.datalib import weathercitizen_sensorburst_pb2 as sensorburst_pb2 + except ImportError: + try: + import sensorburst_pb2 + except ImportError: + raise ImportError( + "Processing WeatherCitizen protobuf requires `sensorburst_pb2.py` in the current working directory. Download from https://api.weathercitizen.org/static/sensorburst_pb2.py." + ) + + if isinstance(media, (str, dict)): + media = [media] + + # get pb content + pbs = [get_file(m) for m in media] + + # initialize protobuf object + Burst = sensorburst_pb2.Burst() + + # get the first dataframe + df = read_protobuf(pbs[0], Burst) + + # append later dataframes + if len(pbs) > 1: + for pb in pbs[1:]: + df = df.append(read_protobuf(pb, Burst), sort=False) + + return df + + +def to_dataframe(items): + """Create normalized dataframe from records + + Parameters + ---------- + items : list of dict + Record items returned from `get()` + """ + df = pd.json_normalize(items) + + # Convert geometry.coordinates to lat and lon + df["lat"] = df["geometry.coordinates"].apply(lambda coord: coord[1] if coord and coord is not np.nan else None) + df["lon"] = df["geometry.coordinates"].apply(lambda coord: coord[0] if coord and coord is not np.nan else None) + df = df.drop(["geometry.coordinates"], axis=1) + + # break up all the arrays so the data is easier to use + arrays = [ + "properties.accelerometer", + "properties.gravity", + "properties.gyroscope", + "properties.linear_acceleration", + "properties.magnetic_field", + "properties.orientation", + "properties.rotation_vector", + ] + + for col in arrays: + df[col + "_0"] = df[col].apply(lambda val: val[0] if val and val is not np.nan else None) + df[col + "_1"] = df[col].apply(lambda val: val[1] if val and val is not np.nan else None) + df[col + "_2"] = df[col].apply(lambda val: val[2] if val and val is not np.nan else None) + + df = df.drop([col], axis=1) + + return df + + +def to_csv(items, filename="weathercitizen-data.csv"): + """Convert items to CSV output + + Parameters + ---------- + items : list of dict + Record items returned from `get()` + """ + + df = to_dataframe(items) + + df.to_csv(filename) + + +def update_progress(current, total): + """ + Parameters + ---------- + current : int, float + current number + total : int, floar + total number + """ + + if total == 0: + return + + progress = float(current / total) + bar_length = 20 + block = int(round(bar_length * progress)) + text = "Progress: |{0}| [{1} / {2}]".format("#" * block + " " * (bar_length - block), current, total) + + print("\r", text, end="") + + +def _build_query( + collection="geosensors", + start_time=None, + end_time=None, + box=None, + near=None, + uuid=None, + device=None, + version=None, + query=None, + projection=None, +): + """Build a query string for a single collection. + See :func:`get` for type definitions of each input + + Returns + ------- + string + query string + """ + + if query is None: + query = {} + + # filter by time + # default to 1 hour ago + one_hour_ago = (datetime.utcnow() - timedelta(hours=1)).strftime(DATE_FORMAT) + if start_time is not None: + start_time = pd.to_datetime(start_time, utc=True, infer_datetime_format=True).strftime(DATE_FORMAT) + query["properties.time"] = {"$gte": start_time} + else: + query["properties.time"] = {"$gte": one_hour_ago} + + # default to now + if end_time is not None: + end_time = pd.to_datetime(end_time, utc=True, infer_datetime_format=True).strftime(DATE_FORMAT) + query["properties.time"]["$lte"] = end_time + + # geo bounding box + if box is not None: + if len(box) != 2: + raise ValueError("box parameter must be a list of length 2") + + query["geometry"] = {"$geoWithin": {"$box": box}} + + # geo bounding circle + if near is not None: + if len(near) != 2 or not isinstance(near, tuple): + raise ValueError("near parameter must be a tuple of length 2") + + query["geometry"] = {"$near": {"$geometry": {"type": "Point", "coordinates": near[0]}, "$maxDistance": near[1]}} + + # specify uuid + if uuid is not None: + if isinstance(uuid, str): + query["properties.uuid"] = uuid + elif isinstance(uuid, list): + query["properties.uuid"] = {"$in": uuid} + + # specify device + if device is not None: + if isinstance(device, str): + query["properties.device"] = device + elif isinstance(device, list): + query["properties.device"] = {"$in": device} + + # specify version + if version is not None: + if isinstance(version, str): + query["version"] = version + elif isinstance(version, list): + query["version"] = {"$in": version} + + # add collection to query string and handle projection + if projection is not None: + query_str = "{}?where={}&projection={}".format(collection, json.dumps(query), json.dumps(projection)) + else: + query_str = "{}?where={}".format(collection, json.dumps(query)) + + return query_str + + +def _get(query, items=None, url=URL, verbose=False, return_length=False): + """Internal method to query API. + See `get` for interface. + + Parameters + ---------- + query : dict, str + query dict or string + if dict, it will be converted into a string with json.dumps() + items : list, optional + aggregated items as this method is recursively called. Defaults to []. + url : str, optional + API url. Defaults to module URL. + verbose : bool, optional + Display log messages or progress + return_length : bool, optional + Return length of the documents that match the query + + Returns + ------- + list + + Raises + ------ + ValueError + Description + """ + + # if items are none, set to [] + if items is None: + items = [] + + # check url + if url[-1] != "/": + url = "{}/".format(url) + + # query the server + r = requests.get(url + query) + + if r.status_code != 200: + raise ValueError("Failed to query the server with status {}.\n\nResponse:\n {}".format(r.status_code, r.text)) + + # get json out of response + resp = r.json() + + # return length only if requested + if return_length: + return resp["_meta"]["total"] + + # return documents + if len(resp["_items"]): + + # show progress + if verbose: + current_page = resp["_meta"]["page"] + total_pages = round(resp["_meta"]["total"] / resp["_meta"]["max_results"]) + update_progress(current_page, total_pages) + + # append items + items += resp["_items"] + + # get next set, if in links + if "_links" in resp and "next" in resp["_links"]: + return _get(resp["_links"]["next"]["href"], items=items) + else: + return items + else: + return items diff --git a/podpac/datalib/weathercitizen_sensorburst_pb2.py b/podpac/datalib/weathercitizen_sensorburst_pb2.py new file mode 100644 index 000000000..94d4f09fa --- /dev/null +++ b/podpac/datalib/weathercitizen_sensorburst_pb2.py @@ -0,0 +1,585 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: sensorburst.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="sensorburst.proto", + package="sensorburst", + syntax="proto3", + serialized_options=_b("H\003"), + serialized_pb=_b( + '\n\x11sensorburst.proto\x12\x0bsensorburst"\xbf\x04\n\x06Record\x12\x0c\n\x04time\x18\x01 \x01(\x03\x12\x0c\n\x04long\x18\x02 \x01(\x02\x12\x0b\n\x03lat\x18\x03 \x01(\x02\x12\x10\n\x08\x61ltitude\x18\x04 \x01(\x02\x12\x13\n\x0btemperature\x18\x05 \x01(\x02\x12\x10\n\x08pressure\x18\x06 \x01(\x02\x12\r\n\x05light\x18\x07 \x01(\x02\x12\x11\n\tproximity\x18\x08 \x01(\x05\x12\x17\n\x0f\x61\x63\x63\x65lerometer_x\x18\t \x01(\x02\x12\x17\n\x0f\x61\x63\x63\x65lerometer_y\x18\n \x01(\x02\x12\x17\n\x0f\x61\x63\x63\x65lerometer_z\x18\x0b \x01(\x02\x12\x1d\n\x15linear_acceleration_x\x18\x0c \x01(\x02\x12\x1d\n\x15linear_acceleration_y\x18\r \x01(\x02\x12\x1d\n\x15linear_acceleration_z\x18\x0e \x01(\x02\x12\x15\n\rorientation_x\x18\x0f \x01(\x02\x12\x15\n\rorientation_y\x18\x10 \x01(\x02\x12\x15\n\rorientation_z\x18\x11 \x01(\x02\x12\x18\n\x10magnetic_field_x\x18\x12 \x01(\x02\x12\x18\n\x10magnetic_field_y\x18\x13 \x01(\x02\x12\x18\n\x10magnetic_field_z\x18\x14 \x01(\x02\x12\x13\n\x0bgyroscope_x\x18\x15 \x01(\x02\x12\x13\n\x0bgyroscope_y\x18\x16 \x01(\x02\x12\x13\n\x0bgyroscope_z\x18\x17 \x01(\x02\x12\x11\n\tgravity_x\x18\x18 \x01(\x02\x12\x11\n\tgravity_y\x18\x19 \x01(\x02\x12\x11\n\tgravity_z\x18\x1a \x01(\x02"-\n\x05\x42urst\x12$\n\x07records\x18\x01 \x03(\x0b\x32\x13.sensorburst.RecordB\x02H\x03\x62\x06proto3' + ), +) + + +_RECORD = _descriptor.Descriptor( + name="Record", + full_name="sensorburst.Record", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="time", + full_name="sensorburst.Record.time", + index=0, + number=1, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="long", + full_name="sensorburst.Record.long", + index=1, + number=2, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="lat", + full_name="sensorburst.Record.lat", + index=2, + number=3, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="altitude", + full_name="sensorburst.Record.altitude", + index=3, + number=4, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="temperature", + full_name="sensorburst.Record.temperature", + index=4, + number=5, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="pressure", + full_name="sensorburst.Record.pressure", + index=5, + number=6, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="light", + full_name="sensorburst.Record.light", + index=6, + number=7, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="proximity", + full_name="sensorburst.Record.proximity", + index=7, + number=8, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="accelerometer_x", + full_name="sensorburst.Record.accelerometer_x", + index=8, + number=9, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="accelerometer_y", + full_name="sensorburst.Record.accelerometer_y", + index=9, + number=10, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="accelerometer_z", + full_name="sensorburst.Record.accelerometer_z", + index=10, + number=11, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="linear_acceleration_x", + full_name="sensorburst.Record.linear_acceleration_x", + index=11, + number=12, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="linear_acceleration_y", + full_name="sensorburst.Record.linear_acceleration_y", + index=12, + number=13, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="linear_acceleration_z", + full_name="sensorburst.Record.linear_acceleration_z", + index=13, + number=14, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="orientation_x", + full_name="sensorburst.Record.orientation_x", + index=14, + number=15, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="orientation_y", + full_name="sensorburst.Record.orientation_y", + index=15, + number=16, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="orientation_z", + full_name="sensorburst.Record.orientation_z", + index=16, + number=17, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="magnetic_field_x", + full_name="sensorburst.Record.magnetic_field_x", + index=17, + number=18, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="magnetic_field_y", + full_name="sensorburst.Record.magnetic_field_y", + index=18, + number=19, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="magnetic_field_z", + full_name="sensorburst.Record.magnetic_field_z", + index=19, + number=20, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="gyroscope_x", + full_name="sensorburst.Record.gyroscope_x", + index=20, + number=21, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="gyroscope_y", + full_name="sensorburst.Record.gyroscope_y", + index=21, + number=22, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="gyroscope_z", + full_name="sensorburst.Record.gyroscope_z", + index=22, + number=23, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="gravity_x", + full_name="sensorburst.Record.gravity_x", + index=23, + number=24, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="gravity_y", + full_name="sensorburst.Record.gravity_y", + index=24, + number=25, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="gravity_z", + full_name="sensorburst.Record.gravity_z", + index=25, + number=26, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=35, + serialized_end=610, +) + + +_BURST = _descriptor.Descriptor( + name="Burst", + full_name="sensorburst.Burst", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="records", + full_name="sensorburst.Burst.records", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=612, + serialized_end=657, +) + +_BURST.fields_by_name["records"].message_type = _RECORD +DESCRIPTOR.message_types_by_name["Record"] = _RECORD +DESCRIPTOR.message_types_by_name["Burst"] = _BURST +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Record = _reflection.GeneratedProtocolMessageType( + "Record", + (_message.Message,), + { + "DESCRIPTOR": _RECORD, + "__module__": "sensorburst_pb2" + # @@protoc_insertion_point(class_scope:sensorburst.Record) + }, +) +_sym_db.RegisterMessage(Record) + +Burst = _reflection.GeneratedProtocolMessageType( + "Burst", + (_message.Message,), + { + "DESCRIPTOR": _BURST, + "__module__": "sensorburst_pb2" + # @@protoc_insertion_point(class_scope:sensorburst.Burst) + }, +) +_sym_db.RegisterMessage(Burst) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/podpac/interpolators.py b/podpac/interpolators.py index 24d4ac20d..27029e4af 100644 --- a/podpac/interpolators.py +++ b/podpac/interpolators.py @@ -5,5 +5,5 @@ # REMINDER: update api docs (doc/source/user/api.rst) to reflect changes to this file -from podpac.core.data.interpolator import Interpolator -from podpac.core.data.interpolators import NearestNeighbor, NearestPreview, Rasterio, ScipyGrid, ScipyPoint +from podpac.core.interpolation.interpolator import Interpolator +from podpac.core.interpolation.interpolators import NearestNeighbor, NearestPreview, Rasterio, ScipyGrid, ScipyPoint diff --git a/podpac/managers.py b/podpac/managers.py index de07bc807..68da6c2f0 100644 --- a/podpac/managers.py +++ b/podpac/managers.py @@ -6,3 +6,5 @@ from podpac.core.managers import aws from podpac.core.managers.aws import Lambda +from podpac.core.managers.parallel import Parallel, ParallelOutputZarr +from podpac.core.managers.multi_process import Process diff --git a/podpac/pipeline.py b/podpac/pipeline.py deleted file mode 100644 index 3488ec1ed..000000000 --- a/podpac/pipeline.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Pipeline Public Module -""" - -# REMINDER: update api docs (doc/source/user/api.rst) to reflect changes to this file - -from podpac.core.pipeline import Pipeline, PipelineError -from podpac.core.pipeline.output import Output, NoOutput, FileOutput, FTPOutput, S3Output, ImageOutput diff --git a/podpac/style.py b/podpac/style.py new file mode 100644 index 000000000..f45dc25bd --- /dev/null +++ b/podpac/style.py @@ -0,0 +1,8 @@ +""" +Style Public Module +""" + +# REMINDER: update api docs (doc/source/api.rst) to reflect changes to this file + + +from podpac.core.style import Style diff --git a/podpac/utils.py b/podpac/utils.py index 84a47ee86..2367cdd54 100644 --- a/podpac/utils.py +++ b/podpac/utils.py @@ -2,8 +2,9 @@ Utils Public Module """ -# REMINDER: update api docs (doc/source/user/api.rst) to reflect changes to this file +# REMINDER: update api docs (doc/source/api.rst) to reflect changes to this file -from podpac.core.utils import create_logfile +from podpac.core.utils import create_logfile, cached_property, NodeTrait from podpac.core.cache import clear_cache +from podpac.core.node import NoCacheMixin, DiskCacheMixin diff --git a/podpac/version.py b/podpac/version.py index d33881831..077bf215d 100644 --- a/podpac/version.py +++ b/podpac/version.py @@ -15,9 +15,9 @@ ############## ## UPDATE VERSION HERE ############## -MAJOR = 1 -MINOR = 3 -HOTFIX = 1 +MAJOR = 2 +MINOR = 0 +HOTFIX = 0 ############## diff --git a/pyproject.toml b/pyproject.toml index 6815edb68..55ec8d784 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,2 @@ [tool.black] line-length = 120 -target-version = ['py38', 'py36', 'py37'] \ No newline at end of file diff --git a/setup.py b/setup.py index 73fae2cfb..35d15d3b7 100644 --- a/setup.py +++ b/setup.py @@ -23,13 +23,19 @@ "traitlets>=4.3", "xarray>=0.10", "requests>=2.18", - "pyproj>=2.4", "lazy-import>=0.2.2", "psutil", ] if sys.version_info.major == 2: - install_requires += ["future>=0.16"] + install_requires += [ + "future>=0.16", + "pyproj>=2.2" + ] +else: + install_requires += [ + "pyproj>=2.4" + ] extras_require = { "datatype": [ @@ -68,9 +74,6 @@ "pytest-html>=1.7.0", "pytest-remotedata>=0.3.1", "recommonmark>=0.6", - "sphinx>=2.3, <3.0", - "sphinx-rtd-theme>=0.4", - "sphinx-autobuild>=0.7", "coveralls>=1.3", "six>=1.0", "attrs>=17.4.0", @@ -79,9 +82,16 @@ } if sys.version_info.major == 2: - extras_require["dev"] += ["pytest>=3.3.2"] + extras_require["dev"] += [ + "pytest>=3.3.2" + ] else: - extras_require["dev"] += ["pytest>=5.0"] + extras_require["dev"] += [ + "sphinx>=2.3, <3.0", + "sphinx-rtd-theme>=0.4", + "sphinx-autobuild>=0.7", + "pytest>=5.0" + ] if sys.version >= '3.6': extras_require["dev"] += [