Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
address code review and added caching demo notebook.
- Loading branch information
Mike McCarty
committed
Aug 17, 2018
1 parent
321bbb2
commit 3bbee53
Showing
2 changed files
with
318 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
|
||
sources: | ||
demographic_stats: | ||
description: Demographic Stats by zip code | ||
driver: csv | ||
cache: | ||
- argkey: urlpath | ||
regex: 'https://s3.amazonaws.com/earth-data' | ||
type: file | ||
args: | ||
urlpath: 'https://s3.amazonaws.com/earth-data/Demographic_Statistics_By_Zip_Code.csv' | ||
text_cache: | ||
description: textfiles in this dir | ||
driver: textfiles | ||
cache: | ||
- argkey: urlpath | ||
regex: '{{ CATALOG_DIR }}' | ||
type: file | ||
args: | ||
urlpath: "{{ CATALOG_DIR }}/*.yml" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,298 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['demographic_stats', 'text_cache']" | ||
] | ||
}, | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"import logging\n", | ||
"logger = logging.getLogger('intake')\n", | ||
"logging.basicConfig()\n", | ||
"logger.setLevel(logging.DEBUG)\n", | ||
"\n", | ||
"import intake\n", | ||
"cat = intake.open_catalog('cache_demo.yml')\n", | ||
"list(cat)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'https://s3.amazonaws.com/earth-data/Demographic_Statistics_By_Zip_Code.csv'" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"stats = cat.demographic_stats()\n", | ||
"stats.cache[0].clear_all()\n", | ||
"stats._urlpath" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"DEBUG:intake:Caching file: https://s3.amazonaws.com/earth-data/Demographic_Statistics_By_Zip_Code.csv\n", | ||
"DEBUG:intake:Original path: https://s3.amazonaws.com/earth-data/Demographic_Statistics_By_Zip_Code.csv\n", | ||
"DEBUG:intake:Cached at: /Users/mmccarty/.intake/cache/f890ce4d538240e87ede9d31a6541443/Demographic_Statistics_By_Zip_Code.csv\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"df = stats.read()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Second read doesn't download" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df = stats.read()\n", | ||
"df.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%ls -la ~/.intake/cache/f890ce4d538240e87ede9d31a6541443" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache[0].get_metadata(stats._urlpath)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache_dirs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache[0].clear_cache(stats._urlpath)\n", | ||
"stats.cache[0].get_metadata(stats._urlpath)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%ls -la ~/.intake/cache" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df = stats.read()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%ls -la ~/.intake/cache/f890ce4d538240e87ede9d31a6541443" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Cache directory is configurable" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache[0].clear_cache(stats._urlpath)\n", | ||
"\n", | ||
"import os.path\n", | ||
"\n", | ||
"cat = intake.open_catalog('cache_demo.yml')\n", | ||
"stats = cat.demographic_stats()\n", | ||
"stats.set_cache_dir(os.path.join(os.getcwd(), 'test_cache_dir'))\n", | ||
"stats.cache_dirs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df = stats.read()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%ls -la ~/.intake/cache/f890ce4d538240e87ede9d31a6541443" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%ls -la ~/sandbox/intake/examples/test_cache_dir/f890ce4d538240e87ede9d31a6541443" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache[0].get_metadata(stats._urlpath)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache[0].clear_all()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Disable Caching" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from intake.config import conf\n", | ||
"conf['cache_disabled'] = True\n", | ||
"\n", | ||
"cat = intake.open_catalog('cache_demo.yml')\n", | ||
"stats = cat.demographic_stats()\n", | ||
"df = stats.read()\n", | ||
"df.head()\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache_dirs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%ls -la /Users/mmccarty/.intake/cache" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"stats.cache[0].get_metadata(stats._urlpath)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |