more fixes to csvheader api; more documentation and data

dannguyen · Nov 10, 2020 · 681b8c6 · 681b8c6
1 parent 09b5d23
commit 681b8c6
Show file tree

Hide file tree

Showing 37 changed files with 6,351 additions and 667 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -1,4 +1,4 @@
-# http://editorconfig.org
+# https://editorconfig.org
 
 root = true
 

diff --git a/Makefile b/Makefile
@@ -49,13 +49,13 @@ clean-test: ## remove test and coverage artifacts
 
 
 test: ## run tests quickly with the default Python
-	nosetests
+	nosetests -x
 
 test-all: ## run tests on every Python version with tox
 	tox
 
 coverage: ## check code coverage quickly with the default Python
-	nosetests --with-coverage --cover-package=csvmedkit
+	nosetests -x --with-coverage --cover-package=csvmedkit
 # 	coverage run --source csvmedkit setup.py test
 # 	coverage report -m
 # 	coverage html
@@ -70,7 +70,7 @@ docs: ## generate Sphinx HTML documentation, including API docs
 	$(BROWSER) docs/_build/html/index.html
 
 servedocs: docs ## compile the docs watching for changes
-	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
+	watchmedo shell-command -p '*.rst*' -c '$(MAKE) -C docs html' -R -D .
 
 release: dist ## package and upload a release
 	twine upload dist/*

diff --git a/TODOS.md b/TODOS.md
@@ -6,7 +6,7 @@
 
 ### csvslice
 
-- [ ] rename `--intervals` to `--include`; in case we rejigger csvslice to allow resequencing of returned rows?
+- [x] rename `--intervals` to `--include`; in case we rejigger csvslice to allow resequencing of returned rows?
 - do docs; this should be style and template for others
     - [ ] write usage examples
     - [ ] write "compared to"
@@ -22,11 +22,11 @@
 
 ### csvheader 
 
-When writing csvheader docs, realized we needed a `-C` option that also replaces the existing header (in the case of piping from csvflatten). And/or, the --rename/-R convention needs to be changed.
-
-NAMING THINGS IS HARD!!! THINK ABOUT IT LATER THIS WEEK
+- [x] `-A` and `--AX` options for adding/overwriting a header by passing in a comma-delimited string of column names
+- [x] `-G` and `--GX` options for adding/overwriting a header with generic field names
+- [x] `-B` and `-C` (bash/create) have been killed for being too confusing  
 
-Current thoughts (2020-11-09):
+old thoughts (2020-11-09):
 
 - `-A/--add <column_names>` to add/append headers
 - `--AX/--add-x/--AS/--add-sub <column_names` to add and overwrite/substitute header
@@ -35,10 +35,15 @@ Current thoughts (2020-11-09):
 
 Old thoughts:
 
+When writing csvheader docs, realized we needed a `-C` option that also replaces the existing header (in the case of piping from csvflatten). And/or, the --rename/-R convention needs to be changed.
+
+NAMING THINGS IS HARD!!! THINK ABOUT IT LATER THIS WEEK
+
+
 - Is there any reason to have `-B/--bash`? In what situation would a user want to replace existing data headers with generic headers?
     - [ ] not really, so kill it...
 - [ ] is `-R/--rename` convention particularly graceful/convenient?
-- [ ] Instead of `-C`, maybe we should have some kind of `--replace`.
+- [x] Instead of `-C`, maybe we should have some kind of `--replace`.
 - [ ] But `--replace` is confusing with `--rename`, so maybe use `-C/--change/clobber`, or `-O/--overwrite`?
 - [ ] `-X/--regex` should just be `--regex` for now?
 
@@ -223,7 +228,7 @@ Overall stuff
 
 
 csvslice:
-    - [X] implementation with simplified `-i/--intervals` option (0.0.9.11)
+    - [X] implementation with simplified `-i/--include` option (0.0.9.11)
     - [x] tests (0.0.9.11)
 
 

diff --git a/csvmedkit/moreutils/csvheader.py b/csvmedkit/moreutils/csvheader.py
@@ -28,29 +28,39 @@ class CSVHeader(CmkUtil):
     ]
 
     def add_arguments(self):
+
         self.argparser.add_argument(
             "-A",
             "--add",
             dest="add_header",
-            action="store_true",
-            help="""Add a header row of generic, numbered column names, starting from 1, e.g. field_1, field_2, and so on.""",
+            metavar="<column_names>",
+            type=str,
+            help="""Add a header row of column names using a comma-delimited string, e.g. 'ID,cost,"name, proper"'""",
+        )
+
+        self.argparser.add_argument(
+            "--AX",
+            "--add-x",
+            dest="add_x_header",
+            metavar="<column_names>",
+            type=str,
+            help="""TKTK Add a header row of column names using a comma-delimited string, e.g. 'ID,cost,"name, proper"'""",
         )
 
         self.argparser.add_argument(
-            "-B",
-            "--bash",
-            dest="bash_header",
+            "-G",
+            "--generic",
+            dest="generic_header",
             action="store_true",
-            help="""Bash (i.e. completely replace) the current header row with generic column names, e.g. field_1, field_2.""",
+            help="""Add a header row of generic, numbered column names, starting from 1, e.g. field_1, field_2, and so on.""",
         )
 
         self.argparser.add_argument(
-            "-C",
-            "--create",
-            dest="create_header",
-            metavar="<column_names>",
-            type=str,
-            help="""Similar to `--add`, but specify column names with a comma-delimited string, e.g. 'ID,cost,"name, proper"'""",
+            "--GX",
+            "--generic-x",
+            dest="generic_x_header",
+            action="store_true",
+            help="""TKTK Bash (i.e. completely replace) the current header row with generic column names, e.g. field_1, field_2.""",
         )
 
         self.argparser.add_argument(
@@ -93,22 +103,38 @@ def add_arguments(self):
         )
 
     @property
-    def add_header(self) -> bool:
-        return self.args.add_header
+    def generic_header(self) -> bool:
+        return self.args.generic_header
+
+    @property
+    def generic_x_header(self) -> bool:
+        return self.args.generic_x_header
 
     @property
-    def bash_header(self) -> bool:
-        return self.args.bash_header
+    def add_header(self) -> typeOptional[list]:
+        if self.args.add_header:
+            return cmk_parse_delimited_str(self.args.add_header, delimiter=",")
+        else:
+            return None
 
     @property
-    def create_header(self) -> typeOptional[list]:
-        if self.args.create_header:
+    def add_x_header(self) -> typeOptional[list]:
+        """todo: refactor with add_header; added_custom_header"""
+        if self.args.add_x_header:
             return cmk_parse_delimited_str(
-                self.args.create_header, delimiter=","
+                self.args.add_x_header, delimiter=","
             )  # TK: do proper delimitation
         else:
             return None
 
+    @property
+    def added_custom_header(self) -> typeOptional[list]:
+        """TODO: refactor"""
+        if self.args.add_header or self.args.add_x_header:
+            return self.add_header if self.add_header else self.add_x_header
+        else:
+            return None
+
     @property
     def preview(self) -> bool:
         return self.args.preview
@@ -147,24 +173,24 @@ def _prepare_headers(self) -> typeTuple[typeIterable]:
 
         rows = self.text_csv_reader()
 
-        if self.add_header or self.bash_header or self.create_header:
+        if self.generic_header or self.generic_x_header or self.added_custom_header:
             # sample first row to get a count of columns
             c_row = next(rows)
-            if self.create_header:
-                column_names = self.create_header
+            if self.added_custom_header:
+                column_names = self.added_custom_header
                 if len(column_names) != len(c_row):
                     raise ValueError(
-                        f"The data has {len(c_row)} columns, but {len(column_names)} column names were parsed from: `{self.args.create_header}`"
+                        f"The data has {len(c_row)} columns, but {len(column_names)} column names were parsed from: `{self.args.add_header}`"
                     )
             else:
                 # then it's generic column names
                 column_names = [
                     f"field_{i}" for i, _c in enumerate(c_row, self.column_start_index)
                 ]
 
-            # add_header and create_header assume the data had no header
+            # generic_header and add_header assume the data had no header
             # which means c_row is actually data and needs to be added back in
-            if self.add_header or self.create_header:
+            if self.generic_header or self.add_header:
                 rows = itertools.chain([c_row], rows)
 
         # all other options assume the data is "normal",
@@ -200,14 +226,15 @@ def _set_modes(self, column_names=typeList[str]) -> typeNoReturn:
         elif any(
             m
             for m in (
+                self.generic_header,
+                self.generic_x_header,
                 self.add_header,
-                self.bash_header,
-                self.create_header,
+                self.add_x_header,
                 self.rename_headers,
                 self.slugify_mode,
                 self.sed_pattern,
-                # self.generic_columnized,  # i.e. a add_header or bash_header
-                # self.args.create_header,
+                # self.generic_columnized,  # i.e. a generic_header or generic_x_header
+                # self.args.add_header,
             )
         ):
             self.output_headers_only = False
@@ -220,10 +247,15 @@ def main(self):
 
         if 1 < sum(
             1 if i else 0
-            for i in (self.add_header, self.bash_header, self.create_header)
+            for i in (
+                self.generic_header,
+                self.generic_x_header,
+                self.add_header,
+                self.add_x_header,
+            )
         ):
             self.argparser.error(
-                "The --add, --bash, and --create options are mutually exclusive; pick one and only one"
+                "The --add, --add-x, --generic, and --generic-x options are mutually exclusive; pick one and only one."
             )
 
         rows, column_names = self._prepare_headers()

diff --git a/csvmedkit/moreutils/csvslice.py b/csvmedkit/moreutils/csvslice.py
@@ -26,12 +26,12 @@ def add_arguments(self):
 
         self.argparser.add_argument(
             "-i",
-            "--intervals",
+            "--include",
             dest="slice_ranges_str",
             metavar="<intervals>",
             required=True,
             type=str,
-            help="""Comma-delimited string of intervals to slice, including individual indexes and ranges, e.g. '0' or '0-6,12', or '0-6,12-'""",
+            help="""Comma-delimited string of intervals to include. Can be individual indexes and ranges, e.g. '0' or '0-6,12', or '0-6,12-'""",
         )
 
     def calculate_slice_ranges(self) -> typeNoReturn:
@@ -48,7 +48,7 @@ def calculate_slice_ranges(self) -> typeNoReturn:
             rtxt = re.sub(r"\s+", "", txt).strip()
             if not re.match(r"^(?:\d+|\d+-|\d+-\d+)$", rtxt):
                 raise IncorrectlyFormattedString(
-                    f"Your --intervals argument, '{self.args.slice_ranges_str}', has an incorrectly formatted value: '{txt}'"
+                    f"Your --include argument, '{self.args.slice_ranges_str}', has an incorrectly formatted value: '{txt}'"
                 )
 
             if "-" not in rtxt:

diff --git a/docs/cookbook/index.rst b/docs/cookbook/index.rst
@@ -1,6 +1,6 @@
-************************************
-Cookbook of real-world CSV wrangling
-************************************
+*****************************************
+Cookbook of real-world CSV wrangling TKTK
+*****************************************
 
 A list of real-world use cases for **csvmedkit** command-line wrangling.
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -6,16 +6,13 @@ The unofficial extended family of csvkit, i.e. even more tools for command-line
 
 .. toctree::
    :maxdepth: 1
+   :glob:
    :caption: The utilities:
 
-   moreutils/csvheader
-   moreutils/csvflatten
-   moreutils/csvnorm
-   moreutils/csvpivot
-   moreutils/csvsed
-   moreutils/csvslice
+   moreutils/*/index
 
 
+   moreutils/*
 
 
 FAQ

diff --git a/docs/moreutils/csvflatten.rst b/docs/moreutils/csvflatten.rst
@@ -483,4 +483,8 @@ Note that ``-rec-id`` by default disables the end-of-record separator
 Common scenarios and use cases
 ==============================
 
-TK TK
+
+.. include:: /scenarios/tweets-csvflatten.rstinc
+
+
+