Skip to content

Commit

Permalink
Improve inline comments, project metadata, and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Feb 26, 2023
1 parent cfdd2c9 commit aacf49c
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 8 deletions.
6 changes: 4 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ Supported input data:

Supported input sources:

- Local files
- HTTP resources
- `File system`_
- `HTTP`_


********
Expand Down Expand Up @@ -172,9 +172,11 @@ Credits and prior art
.. _ddlgenerator: https://pypi.org/project/ddlgenerator/
.. _development: doc/development.rst
.. _Evgeny Karev: https://github.com/roll
.. _file system: https://en.wikipedia.org/wiki/File_system
.. _frictionless: https://github.com/frictionlessdata/framework
.. _fsspec: https://pypi.org/project/fsspec/
.. _Google Sheets: https://en.wikipedia.org/wiki/Google_Sheets
.. _HTTP: https://en.wikipedia.org/wiki/HTTP
.. _InfluxDB line protocol: https://docs.influxdata.com/influxdb/latest/reference/syntax/line-protocol/
.. _JSON: https://www.json.org/
.. _JSON streaming: https://en.wikipedia.org/wiki/JSON_streaming
Expand Down
2 changes: 1 addition & 1 deletion eskema/fastparquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@ def read_col(column, schema_helper, infile, use_cat=False, selfmade=False, assig

num += len(defi) if defi is not None else len(val)

# PATCH for Eskema
# PATCH for Eskema: Terminate `read_col` early, in order to not load the whole file.
if num >= PEEK_LINES:
break
2 changes: 1 addition & 1 deletion eskema/frictionless/pandas_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def create_parser(self, resource):

if resource.format == "pandas":
# TODO: Submit patch to upstream.
# PATCH for Eskema to speed up inference by not loading the whole file.
# PATCH for Eskema: Speed up inference by not loading the whole file.
logger.info(f"Loading data using sample_size={resource.detector.sample_size}")
resource.data = resource.data.head(resource.detector.sample_size)
logger.info(f"Data loaded with size={len(resource.data)}") # noqa: ERA001
Expand Down
4 changes: 2 additions & 2 deletions eskema/pandas/io_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _get_filepath_or_buffer(
if "t" not in fsspec_mode and "b" not in fsspec_mode:
fsspec_mode += "b"

# PATCH for Eskema
# PATCH for Eskema: Let HTTP requests also be handled by `fsspec`.
"""
if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
# TODO: fsspec can also handle HTTP via requests, but leaving this
Expand Down Expand Up @@ -192,6 +192,6 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
return (
isinstance(url, str)
and bool(_RFC_3986_PATTERN.match(url))
# PATCH for Eskema
# PATCH for Eskema: Let HTTP requests also be handled by `fsspec`.
# and not url.startswith(("http://", "https://"))
)
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@ authors = [
]
requires-python = ">=3.6"
dependencies = [
"aiohttp<4",
"click<9",
"crash",
"crate[sqlalchemy]",
"ddlgenerator<0.2",
"frictionless[excel,json,ods,parquet,sql]<5.6",
"fsspec==2023.1",
"fsspec[http]==2023.1",
"json_stream<3",
"line-protocol-parser<2",
"odfpy<2",
Expand Down

0 comments on commit aacf49c

Please sign in to comment.