Improve inline comments, project metadata, and documentation

daq-tools · Feb 26, 2023 · aacf49c · aacf49c
1 parent cfdd2c9
commit aacf49c
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 8 deletions.
diff --git a/README.rst b/README.rst
@@ -29,8 +29,8 @@ Supported input data:
 
 Supported input sources:
 
-- Local files
-- HTTP resources
+- `File system`_
+- `HTTP`_
 
 
 ********
@@ -172,9 +172,11 @@ Credits and prior art
 .. _ddlgenerator: https://pypi.org/project/ddlgenerator/
 .. _development: doc/development.rst
 .. _Evgeny Karev: https://github.com/roll
+.. _file system: https://en.wikipedia.org/wiki/File_system
 .. _frictionless: https://github.com/frictionlessdata/framework
 .. _fsspec: https://pypi.org/project/fsspec/
 .. _Google Sheets: https://en.wikipedia.org/wiki/Google_Sheets
+.. _HTTP: https://en.wikipedia.org/wiki/HTTP
 .. _InfluxDB line protocol: https://docs.influxdata.com/influxdb/latest/reference/syntax/line-protocol/
 .. _JSON: https://www.json.org/
 .. _JSON streaming: https://en.wikipedia.org/wiki/JSON_streaming

diff --git a/eskema/fastparquet/core.py b/eskema/fastparquet/core.py
@@ -169,6 +169,6 @@ def read_col(column, schema_helper, infile, use_cat=False, selfmade=False, assig
 
         num += len(defi) if defi is not None else len(val)
 
-        # PATCH for Eskema
+        # PATCH for Eskema: Terminate `read_col` early, in order to not load the whole file.
         if num >= PEEK_LINES:
             break
diff --git a/eskema/frictionless/pandas_plugin.py b/eskema/frictionless/pandas_plugin.py
@@ -11,7 +11,7 @@ def create_parser(self, resource):
 
     if resource.format == "pandas":
         # TODO: Submit patch to upstream.
-        # PATCH for Eskema to speed up inference by not loading the whole file.
+        # PATCH for Eskema: Speed up inference by not loading the whole file.
         logger.info(f"Loading data using sample_size={resource.detector.sample_size}")
         resource.data = resource.data.head(resource.detector.sample_size)
         logger.info(f"Data loaded with size={len(resource.data)}")  # noqa: ERA001

diff --git a/eskema/pandas/io_common.py b/eskema/pandas/io_common.py
@@ -78,7 +78,7 @@ def _get_filepath_or_buffer(
     if "t" not in fsspec_mode and "b" not in fsspec_mode:
         fsspec_mode += "b"
 
-    # PATCH for Eskema
+    # PATCH for Eskema: Let HTTP requests also be handled by `fsspec`.
     """
     if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
         # TODO: fsspec can also handle HTTP via requests, but leaving this
@@ -192,6 +192,6 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
     return (
         isinstance(url, str)
         and bool(_RFC_3986_PATTERN.match(url))
-        # PATCH for Eskema
+        # PATCH for Eskema: Let HTTP requests also be handled by `fsspec`.
         # and not url.startswith(("http://", "https://"))
     )
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,13 +29,12 @@ authors = [
 ]
 requires-python = ">=3.6"
 dependencies = [
-  "aiohttp<4",
   "click<9",
   "crash",
   "crate[sqlalchemy]",
   "ddlgenerator<0.2",
   "frictionless[excel,json,ods,parquet,sql]<5.6",
-  "fsspec==2023.1",
+  "fsspec[http]==2023.1",
   "json_stream<3",
   "line-protocol-parser<2",
   "odfpy<2",