Tooling upgrade (#52)

* Add flake8 * Add wily * Add isort * Fix documentation issues to automatically link to external docs * Fix travis dependency issues * Manually patch flake8 requirements
georgian-io-archive · Apr 26, 2019 · 0abb2bc · 0abb2bc
1 parent 6a5aa87
commit 0abb2bc
Show file tree

Hide file tree

Showing 53 changed files with 1,788 additions and 848 deletions.
diff --git a/.gitignore b/.gitignore
@@ -112,3 +112,5 @@ pip-wheel-metadata/
 
 # Other
 .DS_Store
+*.sublime-project
+*.sublime-workspace
diff --git a/.isort.cfg b/.isort.cfg
@@ -0,0 +1,2 @@
+[settings]
+known_third_party =  category_encoders,numpy,pandas,pytest,scipy,sklearn,tpot
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,27 @@
 repos:
+-   repo: https://github.com/asottile/seed-isort-config
+    rev: v1.7.0
+    hooks:
+    -   id: seed-isort-config
+-   repo: https://github.com/pre-commit/mirrors-isort
+    rev: v4.3.15
+    hooks:
+    -   id: isort
+        additional_dependencies: ['toml']
 -   repo: https://github.com/ambv/black
     rev: stable
     hooks:
     - id: black
-      language_version: python3.6
+      language_version: python3.6
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v1.2.3
+    hooks:
+    - id: flake8
+-   repo: local
+    hooks:
+    -   id: wily
+        name: wily
+        entry: printf %s "y\ny\n" | wily diff
+        verbose: true
+        language: python
+        additional_dependencies: [wily]
diff --git a/.travis.yml b/.travis.yml
@@ -18,7 +18,6 @@ before_install:
   - pip install poetry
 install:
   - poetry install -v -E dev
-  - poetry run pip install tox tox-travis coveralls
 script:
   - poetry run tox
 after_success:
@@ -29,7 +28,6 @@ env:
 
 jobs:
   include:
-    - python: "3.5"
     - python: "3.6"
 
     - stage: linting

diff --git a/README.rst b/README.rst
@@ -90,7 +90,7 @@ Key Features
 - Ease of Extensibility
 - Scikit-Learn Compatible
 
-Foreshadow supports python 3.5+
+Foreshadow supports python 3.6+
 
 Documentation
 -------------

diff --git a/doc/conf.py b/doc/conf.py
@@ -15,7 +15,6 @@
 import os
 import sys
 
-import toml
 
 sys.path.append(os.path.join(os.path.dirname(__name__), ".."))
 sys.path.append(os.path.join(os.path.dirname(__name__), "../.."))
@@ -64,6 +63,7 @@ def get_version():
 
 # Autodoc Settings
 autodoc_default_flags = ["members", "undoc-members"]
+autodoc_member_order = "bysource"
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -163,7 +163,9 @@ def get_version():
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, "Foreshadow", u"Foreshadow Documentation", [author], 1)]
+man_pages = [
+    (master_doc, "Foreshadow", u"Foreshadow Documentation", [author], 1)
+]
 
 
 # -- Options for Texinfo output ----------------------------------------------
@@ -189,7 +191,12 @@ def get_version():
 # -- Options for intersphinx extension ---------------------------------------
 
 # Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {"https://docs.python.org/": None}
+intersphinx_mapping = {
+    "python": ("http://docs.python.org/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
+    "numpy": ("https://docs.scipy.org/doc/numpy", None),
+    "sklearn": ("https://scikit-learn.org/stable/", None),
+}
 
 # -- Options for todo extension ----------------------------------------------
 

diff --git a/doc/developers.rst b/doc/developers.rst
@@ -43,11 +43,10 @@ General Setup
 
       .. code-block:: console
       
-         $ pyenv install 3.5.5
          $ pyenv install 3.6.5
          $ pyenv global 3.6.5
          $ pyenv virtualenv -p python3.6 3.6.5 venv
-         $ pyenv local venv 3.6.5 3.5.5
+         $ pyenv local venv 3.6.5
    
    3. Install poetry package manager
 

diff --git a/doc/index.rst b/doc/index.rst
@@ -59,7 +59,7 @@ Key Features
 - Ease of Extensibility
 - Scikit-Learn Compatible
 
-Foreshadow supports python 3.5+
+Foreshadow supports python 3.6+
 
 
 The User Guide

diff --git a/doc/users.rst b/doc/users.rst
@@ -351,31 +351,44 @@ An example configuration for processing the Boston Housing dataset is below. We
 .. code-block:: json
 
     {
-      "columns":{
-        "crim":{"intent": "GenericIntent",
-                "pipeline": [
-                  {"transformer": "StandardScaler", "name": "Scaler", "parameters": {"with_mean":false}}
-                ]},
-        "indus":{"intent": "GenericIntent"}
-      },
-
-      "postprocess":[
-        {"name":"pca",
-         "columns": ["age"],
-         "pipeline": [
-          {"transformer": "PCA", "name": "PCA", "parameters": {"n_components":2}}
-        ]}
-      ],
-
-      "intents":{
-        "NumericIntent":{
-          "single":[
-            {"transformer": "Imputer", "name": "impute", "parameters": {"strategy":"mean"}}
-          ],
-          "multi":[]
+        "columns": {
+            "crim": {
+                "intent": "GenericIntent",
+                "pipeline": [{
+                    "transformer": "StandardScaler",
+                    "name": "Scaler",
+                    "parameters": {
+                        "with_mean": false
+                    }
+                }]
+            },
+            "indus": {
+                "intent": "GenericIntent"
+            }
+        },
+        "postprocess": [{
+            "name": "pca",
+            "columns": ["age"],
+            "pipeline": [{
+                "transformer": "PCA",
+                "name": "PCA",
+                "parameters": {
+                    "n_components": 2
+                }
+            }]
+        }],
+        "intents": {
+            "NumericIntent": {
+                "single": [{
+                    "transformer": "Imputer",
+                    "name": "impute",
+                    "parameters": {
+                        "strategy": "mean"
+                    }
+                }],
+                "multi": []
+            }
         }
-      },
-
     }
 
 The configuration file is composed of a root dictionary containing three hard-coded keys: :code:`columns`,
@@ -384,26 +397,40 @@ The configuration file is composed of a root dictionary containing three hard-co
 Column Override
 ~~~~~~~~~~~~~~~
 
-.. code-block:: json
+.. code-block:: python
 
-      "columns":{
-        "crim":{"intent": "GenericIntent",
-                "pipeline": [
-                  {"transformer": "StandardScaler", "name": "Scaler", "parameters": {"with_mean":false}}
-                ]},
-        "indus":{"intent": "GenericIntent"}
-      }
+    "columns": {
+        "crim": {
+            "intent": "GenericIntent",
+            "pipeline": [{
+                "transformer": "StandardScaler",
+                "name": "Scaler",
+                "parameters": {
+                    "with_mean": false
+                }
+            }]
+        },
+        "indus": {
+            "intent": "GenericIntent"
+        }
+    }
 
 This section is a dictionary containing two keys, each of which are columns in the Boston Housing set. First we will look at the value
 of the :code:`"crim"` key which is a dict.
 
 
 .. code-block:: json
-        
-        {"intent": "GenericIntent",
-                "pipeline": [
-                  {"transformer": "StandardScaler", "name": "Scaler", "parameters": {"with_mean":false}}
-        ]}
+
+    {
+        "intent": "GenericIntent",
+        "pipeline": [{
+            "transformer": "StandardScaler",
+            "name": "Scaler",
+            "parameters": {
+                "with_mean": false
+            }
+        }]
+    }
 
 Here we can see that this column has been assigned the intent :code:`"GenericIntent`
 and the pipeline :code:`[{"transformer": "StandardScaler", "name": "Scaler", "parameters": {"with_mean":false}}]`
@@ -427,16 +454,20 @@ compatibility issues.
 Intent Override
 ~~~~~~~~~~~~~~~
 
-.. code-block:: json
+.. code-block:: python
 
-      "intents":{
-        "NumericIntent":{
-          "single":[
-            {"transformer": "Imputer", "name": "impute", "parameters": {"strategy":"mean"}}
-          ],
-          "multi":[]
+    "intents": {
+        "NumericIntent": {
+            "single": [{
+                "transformer": "Imputer",
+                "name": "impute",
+                "parameters": {
+                    "strategy": "mean"
+                }
+            }],
+            "multi": []
         }
-      }
+    }
 
 
 Next, we will examine the :code:`intents` section. This section is used to override intents globally, unlike the columns section which overrode intents on a per-column
@@ -457,12 +488,19 @@ Postprocessor Override
 
 .. code-block:: json
 
-
-    {"postprocess":[
-        {"name":"pca","columns":["age"],"pipeline":[
-            {"class":"PCA", "name":"PCA", "parameters":{"n_components":2}}
-        ]}
-    ]}
+    {
+        "postprocess": [{
+            "name": "pca",
+            "columns": ["age"],
+            "pipeline": [{
+                "class": "PCA",
+                "name": "PCA",
+                "parameters": {
+                    "n_components": 2
+                }
+            }]
+        }]
+    }
 
 Finally, in the :code:`postprocess` section of the configuration, you can manually define pipelines to execute on columns of your choosing. The
 content of this section is a list of dictionaries of the form :code:`[{"name":name, "columns":[cols, ...], "pipeline":pipeline}, ...]`. Each list defines a pipeline that will
@@ -518,28 +556,33 @@ This is what a combinations section looks like.
 
 .. code-block:: json
 
-
-        {
-          "columns":{
-            "crim":{"intent": "GenericIntent",
-                    "pipeline": [
-                            {"transformer": "StandardScaler", "name": "Scaler", "parameters": {"with_mean":false}}
-                    ]},
-            "indus":{"intent": "GenericIntent"}
-          },
-
-          "postprocess":[],
-
-          "intents":{},
-
-          "combinations": [
-            {
-              "columns.crim.pipeline.0.parameters.with_mean": "[True, False]",
-              "columns.crim.pipeline.0.name": "['Scaler', 'SuperScaler']"
+    {
+        "columns": {
+            "crim": {
+                "intent": "GenericIntent",
+                "pipeline": [{
+                    "transformer": "StandardScaler",
+                    "name": "Scaler",
+                    "parameters": {
+                        "with_mean": false
+                    }
+                }]
+            },
+            "indus": {
+                "intent": "GenericIntent"
             }
-          ]
-
-        }
+        },
+    
+        "postprocess": [],
+    
+        "intents": {},
+    
+        "combinations": [{
+            "columns.crim.pipeline.0.parameters.with_mean": "[True, False]",
+            "columns.crim.pipeline.0.name": "['Scaler', 'SuperScaler']"
+        }]
+    
+    }
 
 
 

diff --git a/examples/adult_1.py b/examples/adult_1.py
@@ -1,9 +1,12 @@
+import json
+
 import pandas as pd
-from sklearn.metrics import classification_report, accuracy_score
 from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, classification_report
 from sklearn.model_selection import train_test_split
+
 import foreshadow as fs
-import json
+
 
 RANDOM_SEED = 42
 adult = pd.read_csv("adult.csv").iloc[:1000]

diff --git a/examples/adult_2.py b/examples/adult_2.py
@@ -1,9 +1,12 @@
+import json
+
 import pandas as pd
-from sklearn.metrics import classification_report, accuracy_score
 from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.metrics import accuracy_score, classification_report
+from sklearn.model_selection import GridSearchCV, train_test_split
+
 import foreshadow as fs
-import json
+
 
 RANDOM_SEED = 42
 

diff --git a/examples/adult_x_proc_search.json b/examples/adult_x_proc_search.json
@@ -507,4 +507,4 @@
             "columns.fnlwgt.pipeline.2.transformer": "['StandardScaler', 'MinMaxScaler']"
         }
     ]
-}
+}