Skip to content

Commit

Permalink
feat: find Python files in Rust (#591)
Browse files Browse the repository at this point in the history
  • Loading branch information
mkniewallner committed Mar 24, 2024
1 parent 4f697a1 commit b58868b
Show file tree
Hide file tree
Showing 10 changed files with 240 additions and 136 deletions.
87 changes: 87 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ crate-type = ["cdylib"]
[dependencies]
chardetng = "0.1.17"
encoding_rs = "0.8.33"
ignore = "0.4.22"
log = "0.4.21"
path-slash = "0.2.1"
pyo3 = { version = "0.20.3", features = ["abi3-py38"] }
pyo3-log = "0.9.0"
rayon = "1.9.0"
Expand Down
2 changes: 1 addition & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ classifiers = [
]
dependencies = [
"click>=8.0.0,<9",
"pathspec>=0.9.0",
"colorama>=0.4.6; sys_platform == 'win32'",
"tomli>=2.0.1; python_version < '3.11'"
]
Expand Down
22 changes: 16 additions & 6 deletions python/deptry/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from deptry.exceptions import IncorrectDependencyFormatError, UnsupportedPythonVersionError
from deptry.imports.extract import get_imported_modules_from_list_of_files
from deptry.module import ModuleBuilder, ModuleLocations
from deptry.python_file_finder import PythonFileFinder
from deptry.python_file_finder import get_all_python_files_in
from deptry.reporters import JSONReporter, TextReporter
from deptry.stdlibs import STDLIBS_PYTHON
from deptry.violations import (
Expand Down Expand Up @@ -65,10 +65,7 @@ def run(self) -> None:

self._log_dependencies(dependencies_extract)

all_python_files = PythonFileFinder(
self.exclude, self.extend_exclude, self.using_default_exclude, self.ignore_notebooks
).get_all_python_files_in(self.root)

python_files = self._find_python_files()
local_modules = self._get_local_modules()
stdlib_modules = self._get_stdlib_modules()

Expand All @@ -83,7 +80,7 @@ def run(self) -> None:
).build(),
locations,
)
for module, locations in get_imported_modules_from_list_of_files(all_python_files).items()
for module, locations in get_imported_modules_from_list_of_files(python_files).items()
]
imported_modules_with_locations = [
module_with_locations
Expand All @@ -99,6 +96,19 @@ def run(self) -> None:

self._exit(violations)

def _find_python_files(self) -> list[Path]:
logging.debug("Collecting Python files to scan...")

python_files = get_all_python_files_in(
self.root, self.exclude, self.extend_exclude, self.using_default_exclude, self.ignore_notebooks
)

logging.debug(
"Python files to scan for imports:\n%s\n", "\n".join(str(python_file) for python_file in python_files)
)

return python_files

def _find_violations(
self, imported_modules_with_locations: list[ModuleLocations], dependencies: list[Dependency]
) -> list[Violation]:
Expand Down
85 changes: 12 additions & 73 deletions python/deptry/python_file_finder.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,18 @@
from __future__ import annotations

import logging
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Pattern

from pathspec import PathSpec
from deptry.rust import find_python_files


@dataclass
class PythonFileFinder:
"""
Get a list of all .py and .ipynb files recursively within a directory.
Args:
exclude: A list of regex patterns of paths to ignore.
extend_exclude: An additional list of regex patterns of paths to ignore.
using_default_exclude: Whether the exclude list was explicitly set, or the default was used.
ignore_notebooks: If ignore_notebooks is set to True, .ipynb files are ignored and only .py files are returned.
"""

exclude: tuple[str, ...]
extend_exclude: tuple[str, ...]
using_default_exclude: bool
ignore_notebooks: bool = False

def get_all_python_files_in(self, directories: tuple[Path, ...]) -> list[Path]:
logging.debug("Collecting Python files to scan...")

source_files = set()

ignore_regex = re.compile("|".join(self.exclude + self.extend_exclude))
file_lookup_suffixes = {".py"} if self.ignore_notebooks else {".py", ".ipynb"}

gitignore_spec = self._generate_gitignore_pathspec(Path())

for directory in directories:
for root_str, dirs, files in os.walk(directory, topdown=True):
root = Path(root_str)

if self._is_directory_ignored(root, ignore_regex):
dirs[:] = []
continue

for file_str in files:
file = root / file_str
if not self._is_file_ignored(file, file_lookup_suffixes, ignore_regex, gitignore_spec):
source_files.add(file)

source_files_list = list(source_files)

logging.debug("Python files to scan for imports:\n%s\n", "\n".join([str(file) for file in source_files_list]))

return source_files_list

def _is_directory_ignored(self, directory: Path, ignore_regex: Pattern[str]) -> bool:
return bool((self.exclude + self.extend_exclude) and ignore_regex.match(str(directory)))

def _is_file_ignored(
self, file: Path, file_lookup_suffixes: set[str], ignore_regex: Pattern[str], gitignore_spec: PathSpec | None
) -> bool:
return bool(
file.suffix not in file_lookup_suffixes
or ((self.exclude + self.extend_exclude) and ignore_regex.match(file.as_posix()))
or (gitignore_spec and gitignore_spec.match_file(file))
)

def _generate_gitignore_pathspec(self, directory: Path) -> PathSpec | None:
# If `exclude` is explicitly set, `.gitignore` is not taken into account.
if not self.using_default_exclude:
return None

try:
with (directory / ".gitignore").open() as gitignore:
return PathSpec.from_lines("gitwildmatch", gitignore)
except FileNotFoundError:
return None
def get_all_python_files_in(
directories: tuple[Path, ...],
exclude: tuple[str, ...],
extend_exclude: tuple[str, ...],
using_default_exclude: bool,
ignore_notebooks: bool = False,
) -> list[Path]:
return [
Path(f)
for f in find_python_files(directories, exclude, extend_exclude, using_default_exclude, ignore_notebooks)
]
9 changes: 9 additions & 0 deletions python/deptry/rust.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
from pathlib import Path

from .rust import Location as RustLocation

def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
def find_python_files(
directories: tuple[Path, ...],
exclude: tuple[str, ...],
extend_exclude: tuple[str, ...],
using_default_exclude: bool,
ignore_notebooks: bool = False,
) -> list[str]: ...

class Location:
file: str
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use pyo3::prelude::*;
mod file_utils;
mod imports;
mod location;
mod python_file_finder;
mod visitor;

use location::Location;
Expand All @@ -18,6 +19,7 @@ fn rust(_py: Python, m: &PyModule) -> PyResult<()> {
imports::ipynb::get_imports_from_ipynb_files,
m
)?)?;
m.add_function(wrap_pyfunction!(python_file_finder::find_python_files, m)?)?;
m.add_class::<Location>()?;
Ok(())
}
Loading

0 comments on commit b58868b

Please sign in to comment.