Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exclude packages #142

Merged
merged 15 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ When using `requirements.txt` files to specify dependencies, there is no way to

Yes, you can specify a list of sections after the `--sections` argument. It all depends on what your setup looks like and what you set out to achieve.

### Can I exclude dependencies from the scan?

Yes, you can use the `--exclude-deps` argument to specify one or more dependencies you do not wish to get warnings for.

This feature meant to be used for dependencies you must specify in your dependencies spec file, but which you don't necessarily import in your source code. An example of such a dependency are database drivers, which are commonly only defined in connection strings and will signal to the ORM which driver to use.

### Can I run Creosote in a GitHub Action workflow?

Yes, please see the `action` job example in [`.github/workflows/test.yml`](.github/workflows/test.yml).
Expand Down
43 changes: 38 additions & 5 deletions src/creosote/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import glob
import sys
from typing import List

from loguru import logger

Expand Down Expand Up @@ -72,11 +73,36 @@ def parse_args(args):
help="pyproject.toml section(s) to scan for dependencies",
)

parser.add_argument(
"--exclude-deps",
dest="exclude_deps",
metavar="PACKAGE",
nargs="*",
default=[],
help="Exclude dependencies from the scan",
)

parsed_args = parser.parse_args(args)

return parsed_args


def excluded_packages_not_installed(
excluded_packages: List[str], venv: str
) -> List[str]:
packages = []
if excluded_packages:
for package in excluded_packages:
if package not in parsers.get_installed_packages(venv):
packages.append(package)

if packages:
logger.warning(
f"Excluded packages not found in virtual environment: {', '.join(packages)}"
)
return packages


def main(args_=None):
args = parse_args(args_)

Expand All @@ -96,15 +122,19 @@ def main(args_=None):
logger.debug(f"- {imp}")

logger.debug(f"Parsing {args.deps_file} for packages...")
deps_reader = parsers.PackageReader()
deps_reader.read(args.deps_file, args.sections)
deps_reader = parsers.DependencyReader(
deps_file=args.deps_file,
sections=args.sections,
exclude_deps=args.exclude_deps,
)
dependency_names = deps_reader.read()

logger.debug(f"Packages found in {args.deps_file}:")
for package in deps_reader.packages:
for package in dependency_names:
logger.debug(f"- {package}")

deps_resolver = resolvers.DepsResolver(
imports=imports, packages=deps_reader.packages or [], venv=args.venv
imports=imports, packages=dependency_names, venv=args.venv
)
deps_resolver.resolve()

Expand All @@ -115,7 +145,10 @@ def main(args_=None):
for package in deps_resolver.packages:
logger.debug(f"- {package}")

unused_packages = deps_resolver.get_unused_package_names()
unused_packages = sorted(
deps_resolver.get_unused_package_names()
+ excluded_packages_not_installed(args.exclude_deps, args.venv)
)
formatters.print_results(unused_packages=unused_packages, format_=args.format)
return 1 if unused_packages else 0 # exit code

Expand Down
4 changes: 2 additions & 2 deletions src/creosote/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ class Import:


@dataclasses.dataclass
class Package:
name: str
class PackageInfo:
name: str # as defined in the dependencies specification file
top_level_import_names: Optional[List[str]] = None
distlib_db_import_name: Optional[str] = None
canonicalized_package_name: Optional[str] = None
Expand Down
112 changes: 62 additions & 50 deletions src/creosote/parsers.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,59 @@
import ast
import pathlib
import re
from functools import lru_cache
from typing import Any, Dict, List, cast

import toml
from dotty_dict import Dotty, dotty
from loguru import logger
from pip_requirements_parser import RequirementsFile

from creosote.models import Import, Package
from creosote.models import Import


class PackageReader:
def __init__(self):
self.packages = []
class DependencyReader:
"""Read dependencies from various dependency file formats."""

def pyproject_pep621(self, section_contents: List[str]):
def __init__(
self,
deps_file: str,
sections: List[str],
exclude_deps: List[str],
) -> None:
always_excluded_packages = ["python"] # occurs in Poetry setup

self.deps_file = deps_file
self.sections = sections
self.exclude_deps = exclude_deps + always_excluded_packages

def read(self) -> List[str]:
if not pathlib.Path(self.deps_file).exists():
raise Exception(f"File {self.deps_file} does not exist")

dependency_names = []
always_excluded_packages = ["python"] # occurs in Poetry setup
packages_to_exclude = always_excluded_packages + self.exclude_deps

if self.deps_file.endswith(".toml"): # pyproject.toml expected
for dependency_name in self.load_pyproject(self.deps_file, self.sections):
if dependency_name not in packages_to_exclude:
dependency_names.append(dependency_name)
elif self.deps_file.endswith(".txt") or self.deps_file.endswith(".in"):
for dependency_name in self.load_requirements(self.deps_file):
if dependency_name not in packages_to_exclude:
dependency_names.append(dependency_name)
else:
raise NotImplementedError(
f"Dependency specs file {self.deps_file} is not supported."
)

logger.info(
f"Found packages in {self.deps_file}: " f"{', '.join(dependency_names)}"
)

return dependency_names

def load_pyproject_pep621(self, section_contents: List[str]):
if not isinstance(section_contents, list):
raise TypeError("Unexpected dependency format, list expected.")

Expand All @@ -26,13 +63,13 @@ def pyproject_pep621(self, section_contents: List[str]):
section_deps.append(parsed_dep)
return section_deps

def pyproject_poetry(self, section_contents: Dict[str, Any]):
def load_pyproject_poetry(self, section_contents: Dict[str, Any]):
if not isinstance(section_contents, dict):
raise TypeError("Unexpected dependency format, dict expected.")
return section_contents.keys()

def pyproject(self, deps_file: str, sections: List[str]):
"""Return dependencies from pyproject.toml."""
def load_pyproject(self, deps_file: str, sections: List[str]):
"""Read dependency names from pyproject.toml."""
with open(deps_file, "r", encoding="utf-8") as infile:
contents = toml.loads(infile.read())

Expand All @@ -50,16 +87,16 @@ def pyproject(self, deps_file: str, sections: List[str]):
section_deps = []
if section.startswith("project"):
logger.debug(f"Detected PEP-621 toml section in {deps_file}")
section_deps = self.pyproject_pep621(section_contents)
section_deps = self.load_pyproject_pep621(section_contents)
elif section.startswith("packages") or section.startswith("dev-packages"):
logger.debug(f"Detected pipenv/Pipfile toml section in {deps_file}")
section_deps = self.pyproject_pep621(section_contents)
section_deps = self.load_pyproject_pep621(section_contents)
elif section.startswith("tool.pdm"):
logger.debug(f"Detected PDM toml section in {deps_file}")
section_deps = self.pyproject_pep621(section_contents)
section_deps = self.load_pyproject_pep621(section_contents)
elif section.startswith("tool.poetry"):
logger.debug(f"Detected Poetry toml section in {deps_file}")
section_deps = self.pyproject_poetry(cast(dict, section_contents))
section_deps = self.load_pyproject_poetry(cast(dict, section_contents))
else:
raise TypeError("Unsupported dependency format.")

Expand All @@ -70,17 +107,17 @@ def pyproject(self, deps_file: str, sections: List[str]):

return sorted(deps)

def requirements(self, deps_file: str):
"""Return dependencies from requirements.txt-format file."""
def load_requirements(self, deps_file: str) -> List[str]:
"""Read dependency names from requirements.txt-format file."""
deps = RequirementsFile.from_file(deps_file).requirements
return sorted([dep.name for dep in deps if dep.name is not None])

@staticmethod
def parse_dep_string(dep: str):
if "@" in dep:
return PackageReader.dependency_without_direct_reference(dep)
return DependencyReader.dependency_without_direct_reference(dep)
else:
return PackageReader.dependency_without_version_constraint(dep)
return DependencyReader.dependency_without_version_constraint(dep)

@staticmethod
def dependency_without_version_constraint(dependency_string: str):
Expand All @@ -104,39 +141,6 @@ def dependency_without_direct_reference(dependency_string: str):
dep = match.groups()[0]
return dep

@lru_cache(maxsize=None) # noqa: B019
def ignore_packages(self):
return ["python"]

def filter_ignored_dependencies(self, deps):
packages = []
for dep in deps:
if dep not in self.ignore_packages():
packages.append(Package(name=dep))
return packages

def read(self, deps_file: str, sections: List[str]):
if not pathlib.Path(deps_file).exists():
raise Exception(f"File {deps_file} does not exist")

if deps_file.endswith(".toml"): # pyproject.toml expected
self.packages = self.filter_ignored_dependencies(
self.pyproject(deps_file, sections)
)
elif deps_file.endswith(".txt") or deps_file.endswith(".in"):
self.packages = self.filter_ignored_dependencies(
self.requirements(deps_file)
)
else:
raise NotImplementedError(
f"Dependency specs file {deps_file} is not supported."
)

logger.info(
f"Found packages in {deps_file}: "
f"{', '.join([pkg.name for pkg in self.packages])}"
)


def get_module_info_from_code(path):
"""Get imports, based on given filepath.
Expand Down Expand Up @@ -180,3 +184,11 @@ def get_modules_from_code(paths):
dupes_removed.append(imp)

return dupes_removed


def get_installed_packages(venv):
site_packages = pathlib.Path(venv).glob("**/site-packages").__next__()
packages = []
for path in site_packages.glob("**/*.dist-info"):
packages.append(path.name.split("-")[0])
return packages
26 changes: 14 additions & 12 deletions src/creosote/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,26 @@
from distlib import database
from loguru import logger

from creosote.models import Import, Package
from creosote.models import Import, PackageInfo


class DepsResolver:
def __init__(
self,
imports: List[Import],
packages: List[Package],
packages: List[str],
venv: str,
):
self.imports = imports
self.packages = packages
self.packages = [PackageInfo(name=package) for package in packages]
self.venv = venv

self.map_package_to_import_via_top_level_txt_file
self.top_level_package_pattern = re.compile(
r"\/([\w]*).[\d\.]*.dist-info\/top_level.txt"
)

self.unused_packages: List[Package] = []
self.unused_packages: List[PackageInfo] = []

@staticmethod
def canonicalize_module_name(module_name: str):
Expand Down Expand Up @@ -54,7 +54,9 @@ def gather_top_level_filepaths(self):
for top_level_filepath in self.top_level_filepaths:
logger.debug(f"Found {top_level_filepath}")

def map_package_to_import_via_top_level_txt_file(self, package: Package) -> bool:
def map_package_to_import_via_top_level_txt_file(
self, package: PackageInfo
) -> bool:
"""Return True if import name was found in the top_level.txt."""
package_name = self.canonicalize_module_name(package.name)

Expand All @@ -67,14 +69,14 @@ def map_package_to_import_via_top_level_txt_file(self, package: Package) -> bool
package.top_level_import_names = [line.strip() for line in lines]
import_names = ",".join(package.top_level_import_names)
logger.debug(
f"[{package.name}] found import name via top_level.txt: "
f"{import_names} ⭐️"
f"[{package.name}] found import name "
f"via top_level.txt: {import_names} ⭐️"
)
return True
logger.debug(f"[{package.name}] did not find top_level.txt in venv")
return False

def map_package_to_module_via_distlib(self, package: Package) -> bool:
def map_package_to_module_via_distlib(self, package: PackageInfo) -> bool:
"""Fallback to distlib if we can't find the top_level.txt file.

It seems this brings very little value right now, but I'll
Expand Down Expand Up @@ -103,7 +105,7 @@ def map_package_to_module_via_distlib(self, package: Package) -> bool:
break

logger.debug(
f"[{package.name}] found import name via distlib.database: {module} 🤞"
f"[{package.name}] found import name " f"via distlib.database: {module} 🤞"
)
package.distlib_db_import_name = module
return True
Expand Down Expand Up @@ -147,11 +149,11 @@ def gather_import_info(self):
)
if not found_import_name:
logger.debug(
f"[{package.name}] relying on canonicalization fallback: "
f"{package.canonicalized_package_name } 🤞"
f"[{package.name}] relying on canonicalization "
f"fallback: {package.canonicalized_package_name } 🤞"
)

def associate_package_with_import(self, package: Package, import_name: str):
def associate_package_with_import(self, package: PackageInfo, import_name: str):
for imp in self.imports.copy():
if not imp.module and import_name in imp.name: # noqa: SIM114
# import <imp.name>
Expand Down
6 changes: 3 additions & 3 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from creosote.parsers import PackageReader
from creosote.parsers import DependencyReader


@pytest.mark.parametrize(
Expand All @@ -15,7 +15,7 @@
],
)
def test_dependency_without_version_constraint(dependency_string, expected_package):
assert expected_package == PackageReader.dependency_without_version_constraint(
assert expected_package == DependencyReader.dependency_without_version_constraint(
dependency_string
)

Expand All @@ -30,6 +30,6 @@ def test_dependency_without_version_constraint(dependency_string, expected_packa
],
)
def test_pyproject_directref_package_name(dependency_string, expected_package):
assert expected_package == PackageReader.dependency_without_direct_reference(
assert expected_package == DependencyReader.dependency_without_direct_reference(
dependency_string
)