Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates #54

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions .flake8
@@ -0,0 +1,8 @@
[flake8]
exclude = docs/*
max-line-length = 88
extend-ignore = E203
ignore = E501,W504,W503
per-file-ignores =
**/__init__.py:F401
**/tests/*:D
13 changes: 6 additions & 7 deletions .github/workflows/build_docs.yml
Expand Up @@ -8,15 +8,15 @@ on:
name: Build Docs
jobs:
build_docs:
name: 3.7 build docs
name: 3.8 build docs
runs-on: ubuntu-latest
steps:
- name: Set up python 3.7
uses: actions/setup-python@v2
- name: Set up python 3.8
uses: actions/setup-python@v4
with:
python-version: 3.7
python-version: 3.8
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
Expand All @@ -28,8 +28,7 @@ jobs:
sudo apt install -y graphviz
pip config --site set global.progress_bar off
python -m pip install --upgrade pip
python -m pip install -e .
python -m pip install -r dev-requirements.txt
python -m pip install .[dev]
- name: Build docs
run: |
make -C docs/ html
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/entry_point_test.yml
Expand Up @@ -9,23 +9,23 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python_version: ["3.7", "3.8", "3.9"]
python_version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- name: Set up python ${{ matrix.python_version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Build source distribution
run: make package_autonormalize
run: make package
- name: Install package
run: |
pip config --site set global.progress_bar off
python -m pip install --upgrade pip
python -m pip install -e unpacked_sdist/
python -m pip install unpacked_sdist/
- name: Test entry point
run: make entry-point-test
10 changes: 4 additions & 6 deletions .github/workflows/lint_check.yml
Expand Up @@ -12,23 +12,21 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python_version: ["3.9"]
python_version: ["3.11"]
steps:
- name: Set up python ${{ matrix.python_version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Install autonormalize with dev, and test requirements
run: |
pip config --site set global.progress_bar off
python -m pip install --upgrade pip
python -m pip install -e .
python -m pip install -r dev-requirements.txt
python -m pip install -r test-requirements.txt
python -m pip install .[dev]
- name: Run lint test
run: make lint
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Expand Up @@ -8,7 +8,7 @@ jobs:
name: PyPI Release
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: PyPI Upload
uses: FeatureLabs/gh-action-pypi-upload@v1
env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release_notes_updated.yml
Expand Up @@ -26,10 +26,10 @@ jobs:
print('::set-output name=is_dev::' + str(is_dev))
- if: ${{ steps.branch.outputs.is_dev == 'True' }}
name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- if: ${{ steps.branch.outputs.is_dev == 'True' }}
name: Check if release notes were updated
run: cat docs/source/release_notes.rst | grep ":pr:\`${{ github.event.number }}\`"
run: cat docs/source/release_notes.rst | grep ":pr:\`${{ github.event.number }}\`"
37 changes: 18 additions & 19 deletions .github/workflows/unit_tests_with_latest_deps.yml
@@ -1,51 +1,50 @@
name: Unit Tests - Latest Dependencies
on:
pull_request:
types: [opened, synchronize]
push:
branches:
- main

name: Unit Tests - Latest Dependencies
env:
PYARROW_IGNORE_TIMEZONE: 1
ALTERYX_OPEN_SRC_UPDATE_CHECKER: False
jobs:
unit_tests:
name: Unit Tests - Python ${{ matrix.python-version }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9"]
python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- name: Set up python ${{ matrix.python_version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Build source distribution
run: make package_autonormalize
run: make package
- name: Install package with test requirements
run: |
sudo python -m pip config --site set global.progress_bar off
python -m pip install --upgrade pip
sudo apt update && sudo apt install -y graphviz
python -m pip install -e unpacked_sdist/
python -m pip install -r unpacked_sdist/test-requirements.txt
- if: ${{ matrix.python_version == 3.7 }}
sudo apt update
sudo apt install -y graphviz
python -m pip install "unpacked_sdist/[test]"
- if: ${{ matrix.python_version == 3.8 }}
name: Run unit tests with code coverage
run: |
python -m pip install "$(cat dev-requirements.txt | grep codecov)"
coverage erase
cd unpacked_sdist/
coverage erase
pytest autonormalize/ -n 2 --cov=autonormalize --cov-config=../.coveragerc
env:
PYARROW_IGNORE_TIMEZONE: 1
ALTERYX_OPEN_SRC_UPDATE_CHECKER: False
- if: ${{ matrix.python_version == 3.7 }}
pytest autonormalize/ -n auto --cov=autonormalize --cov-config=../pyproject.toml --cov-report=xml:../coverage.xml
- if: ${{ matrix.python_version == 3.8 }}
name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
files: ${{ github.workspace }}/coverage.xml
verbose: true
1 change: 1 addition & 0 deletions .gitignore
@@ -1,4 +1,5 @@
.DS_Store
unpacked_sdist/

# IDE
.vscode
Expand Down
37 changes: 37 additions & 0 deletions .pre-commit-config.yaml
@@ -0,0 +1,37 @@
exclude: ^LICENSE/|\.(html|csv|svg|md|txt|json)$
default_stages: [commit]
repos:
- repo: https://github.com/kynan/nbstripout
rev: 0.5.0
hooks:
- id: nbstripout
entry: nbstripout
language: python
types: [jupyter]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
hooks:
- id: absolufy-imports
files: ^autonormalize/
- repo: https://github.com/asottile/add-trailing-comma
rev: v2.2.3
hooks:
- id: add-trailing-comma
name: Add trailing comma
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
args: [--settings-path=./pyproject.toml]
- repo: https://github.com/python/black
rev: 22.6.0
hooks:
- id: black
args: [--target-version=py311]
additional_dependencies: [".[jupyter]"]
types_or: [python, jupyter]
48 changes: 31 additions & 17 deletions Makefile
Expand Up @@ -4,37 +4,51 @@ clean:
find . -name '*.pyc' -delete
find . -name __pycache__ -delete
find . -name '*~' -delete
find . -name '.coverage.*' -delete

.PHONY: entry-point-test
entry-point-test:
cd ~ && python -c "from featuretools import autonormalize"
cd ~ && python -c "from autonormalize import autonormalize"

.PHONY: lint
lint:
flake8 autonormalize && isort --check-only autonormalize
isort --check-only autonormalize
black autonormalize docs/source -t py311 --check
flake8 autonormalize

.PHONY: lint-fix
lint-fix:
autopep8 --in-place --recursive --max-line-length=100 --exclude="*/migrations/*" --select="E225,E303,E302,E203,E128,E231,E251,E271,E127,E126,E301,W291,W293,E226,E306,E221,E261,E111,E114" autonormalize
black autonormalize docs/source -t py311
isort autonormalize

.PHONY: test
test: lint
pytest autonormalize/
test:
pytest autonormalize/ -n auto

.PHONY: testcoverage
testcoverage: lint
pytest autonormalize/ --cov=autonormalize
testcoverage:
pytest autonormalize/ -n auto --cov=autonormalize

.PHONY: installdeps
installdeps:
installdeps: upgradepip
pip install --upgrade pip
pip install -e .
pip install -r dev-requirements.txt

.PHONY: package_autonormalize
package_autonormalize:
python setup.py sdist
$(eval DT_VERSION=$(shell python setup.py --version))
tar -zxvf "dist/autonormalize-${DT_VERSION}.tar.gz"
mv "autonormalize-${DT_VERSION}" unpacked_sdist
pip install -e ".[dev]"

.PHONY: upgradepip
upgradepip:
python -m pip install --upgrade pip

.PHONY: upgradebuild
upgradebuild:
python -m pip install --upgrade build

.PHONY: upgradesetuptools
upgradesetuptools:
python -m pip install --upgrade setuptools

.PHONY: package
package: upgradepip upgradebuild upgradesetuptools
python -m build
$(eval PACKAGE=$(shell python -c "from pep517.meta import load; metadata = load('.'); print(metadata.version)"))
tar -zxvf "dist/autonormalize-${PACKAGE}.tar.gz"
mv "autonormalize-${PACKAGE}" unpacked_sdist
2 changes: 1 addition & 1 deletion autonormalize/.gitignore
Expand Up @@ -2,4 +2,4 @@
*.csv
*.pyc

__pycache__/
__pycache__/
7 changes: 3 additions & 4 deletions autonormalize/__init__.py
@@ -1,5 +1,4 @@
# flake8: noqa
from .autonormalize import *
from .classes import Dependencies

__version__ = '2.0.1'
from autonormalize.version import __version__
from autonormalize.autonormalize import *
from autonormalize.classes import Dependencies
24 changes: 16 additions & 8 deletions autonormalize/autonormalize.py
@@ -1,7 +1,7 @@
import featuretools as ft

from . import dfd, normalize
from .classes import Dependencies
from autonormalize import dfd, normalize
from autonormalize.classes import Dependencies


def find_dependencies(df, accuracy=0.98, index=None):
Expand Down Expand Up @@ -92,7 +92,7 @@ def make_entityset(df, dependencies, name=None, time_index=None):

while stack != []:
current = stack.pop()
if (current.df.ww.schema is None):
if current.df.ww.schema is None:
current.df.ww.init(index=current.index[0], name=current.index[0])

current_df_name = current.df.ww.name
Expand All @@ -101,13 +101,15 @@ def make_entityset(df, dependencies, name=None, time_index=None):
else:
dataframes[current_df_name] = (current.df, current.index[0])
for child in current.children:
if (child.df.ww.schema is None):
if child.df.ww.schema is None:
child.df.ww.init(index=child.index[0], name=child.index[0])
child_df_name = child.df.ww.name
# add to stack
# add relationship
stack.append(child)
relationships.append((child_df_name, child.index[0], current_df_name, child.index[0]))
relationships.append(
(child_df_name, child.index[0], current_df_name, child.index[0]),
)

return ft.EntitySet(name, dataframes, relationships)

Expand Down Expand Up @@ -163,10 +165,16 @@ def normalize_entityset(es, accuracy=0.98):
# to normalize while preserving existing relationships

if len(es.dataframes) > 1:
raise ValueError('There is more than one dataframe in this EntitySet')
raise ValueError("There is more than one dataframe in this EntitySet")
if len(es.dataframes) == 0:
raise ValueError('This EntitySet is empty')
raise ValueError("This EntitySet is empty")

df = es.dataframes[0]
new_es = auto_entityset(df, accuracy, index=df.ww.index, name=es.id, time_index=df.ww.time_index)
new_es = auto_entityset(
df,
accuracy,
index=df.ww.index,
name=es.id,
time_index=df.ww.time_index,
)
return new_es