From d13328c07464a4be86101a345e32760d0557389e Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Thu, 11 Aug 2022 14:42:10 -0800 Subject: [PATCH 1/3] Move project metadata to pyproject.toml It's encourage for project to define their metadata statically. Makes static analyzers and other tooling work better. --- pyproject.toml | 58 +++++++++++++++++++++++++++++++++++++++++++- setup.py | 66 +------------------------------------------------- 2 files changed, 58 insertions(+), 66 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d46b0d5e7..d718c0184 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,63 @@ +[project] +name = "dedupe" +description = "A python library for accurate and scaleable data deduplication and entity-resolution" +version = "2.0.17" +readme = "README.md" +requires-python = ">=3.7" +license = {file = "LICENSE"} +keywords = [] +authors = [ + { name = "Forest Gregg", email = "fgregg@datamade.us" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX", + "Programming Language :: Cython", + "Programming Language :: Python :: 3", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Information Analysis", +] +dependencies = [ + "scikit-learn", + "affinegap>=1.3", + "categorical-distance>=1.9", + "dedupe-variable-datetime", + "numpy>=1.13", + "doublemetaphone", + "highered>=0.2.0", + "simplecosine>=1.2", + "haversine>=0.4.1", + "BTrees>=4.1.4", + "zope.index", + "Levenshtein_search==1.4.5", + "typing_extensions", +] + +[project.urls] +Homepage = "https://github.com/dedupeio/dedupe" +Issues = "https://github.com/dedupeio/dedupe/issues" +Documentation = "https://docs.dedupe.io/en/latest/" +Examples = "https://github.com/dedupeio/dedupe-examples" +Twitter = "https://twitter.com/DedupeIo" +Changelog = "https://github.com/dedupeio/dedupe/blob/main/CHANGELOG.md" +MailingList = "https://groups.google.com/forum/#!forum/open-source-deduplication" + + [build-system] -requires = ["setuptools", +requires = ["setuptools>=64", "wheel", "cython"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +packages = ["dedupe", "dedupe.variables"] [tool.mypy] plugins = "numpy.typing.mypy_plugin" diff --git a/setup.py b/setup.py index 98a5b9f15..18b760eab 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,3 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - try: from setuptools import Extension, setup except ImportError: @@ -10,68 +7,7 @@ from Cython.Build import cythonize -install_requires = [ - "scikit-learn", - "affinegap>=1.3", - "categorical-distance>=1.9", - "dedupe-variable-datetime", - "numpy>=1.13", - "doublemetaphone", - "highered>=0.2.0", - "simplecosine>=1.2", - "haversine>=0.4.1", - "BTrees>=4.1.4", - "zope.index", - "Levenshtein_search==1.4.5", - "typing_extensions", -] - setup( - name="dedupe", - url="https://github.com/dedupeio/dedupe", - version="2.0.17", - author="Forest Gregg", - author_email="fgregg@datamade.us", - description="A python library for accurate and scaleable data deduplication and entity-resolution", - packages=["dedupe", "dedupe.variables"], - ext_modules=cythonize( - [Extension("dedupe.cpredicates", ["dedupe/cpredicates.pyx"])] - ), - install_requires=install_requires, - python_requires=">=3.7", - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX", - "Programming Language :: Cython", - "Programming Language :: Python :: 3", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Information Analysis", - ], - long_description=""" - dedupe is a library that uses machine learning to perform de-duplication and entity resolution quickly on structured data. dedupe is the open source engine for `dedupe.io `_ - - **dedupe** will help you: - - * **remove duplicate entries** from a spreadsheet of names and addresses - * **link a list** with customer information to another with order history, even without unique customer id's - * take a database of campaign contributions and **figure out which ones were made by the same person**, even if the names were entered slightly differently for each record - - dedupe takes in human training data and comes up with the best rules for your dataset to quickly and automatically find similar records, even with very large databases. - """, # noqa: E501 - project_urls={ - "Issues": "https://github.com/dedupeio/dedupe/issues", - "Documentation": "https://docs.dedupe.io/en/latest/", - "Examples": "https://github.com/dedupeio/dedupe-examples", - "Twitter": "https://twitter.com/DedupeIo", - "Changelog": "https://github.com/dedupeio/dedupe/blob/main/CHANGELOG.md", - "Mailing List": "https://groups.google.com/forum/#!forum/open-source-deduplication", - }, + ext_modules=cythonize([Extension("dedupe.cpredicates", ["dedupe/cpredicates.pyx"])]) ) From 3ad797752bf43834c4664b1338f6e60ccd280b86 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Thu, 11 Aug 2022 14:43:44 -0800 Subject: [PATCH 2/3] Lock to setuptools 63 for building Testing if we're getting hit by https://github.com/pypa/setuptools/issues/3504? --- pyproject.toml | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d718c0184..4bcbaab3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ MailingList = "https://groups.google.com/forum/#!forum/open-source-deduplication [build-system] -requires = ["setuptools>=64", +requires = ["setuptools==63", "wheel", "cython"] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 18b760eab..4482947b5 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,6 @@ from Cython.Build import cythonize - setup( ext_modules=cythonize([Extension("dedupe.cpredicates", ["dedupe/cpredicates.pyx"])]) ) From a6812e8fc9cde233ca197e7222b933942c9b30e5 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Fri, 2 Sep 2022 11:33:59 -0800 Subject: [PATCH 3/3] Rename setup.cfg to .flake8 flake8 seems to pick up this config, if I set max-line-length to 10 then it fails --- setup.cfg => .flake8 | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename setup.cfg => .flake8 (100%) diff --git a/setup.cfg b/.flake8 similarity index 100% rename from setup.cfg rename to .flake8