Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,38 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.12] - 2025-07-21

### Changed
- **BREAKING CHANGE**: Refactored `Codeanalyzer` constructor to use `AnalysisOptions` dataclass [in response to #12](https://github.com/codellm-devkit/codeanalyzer-python/issues/12)
- Replaced multiple individual parameters with single `AnalysisOptions` object for cleaner API
- Improved type safety and configuration management through centralized options structure
- Enhanced maintainability and extensibility for future configuration additions
- Updated CLI integration to create and pass `AnalysisOptions` instance
- Maintained backward compatibility in terms of functionality while improving code architecture

### Added
- New `AnalysisOptions` dataclass in `codeanalyzer.options` module [in response to #12](https://github.com/codellm-devkit/codeanalyzer-python/issues/12)
- Centralized configuration structure with all analysis parameters
- Type-safe configuration with proper defaults and validation
- Support for `OutputFormat` enum integration
- Clean separation between CLI and library configuration handling

### Technical Details
- Added new `codeanalyzer.options` package with `AnalysisOptions` dataclass
- Updated `Codeanalyzer.__init__()` to accept single `options` parameter instead of 9 individual parameters
- Modified CLI handler in `__main__.py` to create `AnalysisOptions` instance from command line arguments
- Improved code organization and maintainability for configuration management
- Enhanced API design following best practices for parameter object patterns

## [0.1.11] - 2025-07-21

### Fixed
- **CRITICAL**: Fixed NumPy build failure on Python 3.12+ (addresses [#19](https://github.com/codellm-devkit/codeanalyzer-python/issues/19))
- Updated NumPy dependency constraints to handle Python 3.12+ compatibility
- Split NumPy version constraints into three tiers:
- `numpy>=1.21.0,<1.24.0` for Python < 3.11
- `numpy>=1.24.0,<2.0.0` for Python 3.11.x
- `numpy>=1.24.0,<2.0.0` for Python 3.11.x
- `numpy>=1.26.0,<2.0.0` for Python 3.12+ (requires NumPy 1.26+ which supports Python 3.12)
- Resolves `ModuleNotFoundError: No module named 'distutils'` errors on Python 3.12+
- Ensures compatibility with Python 3.12 which removed `distutils` from the standard library
Expand Down
126 changes: 40 additions & 86 deletions codeanalyzer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,107 +7,62 @@
from codeanalyzer.utils import _set_log_level, logger
from codeanalyzer.config import OutputFormat
from codeanalyzer.schema import model_dump_json
from codeanalyzer.options import AnalysisOptions

def main(
input: Annotated[
Path, typer.Option("-i", "--input", help="Path to the project root directory.")
],
output: Annotated[
Optional[Path],
typer.Option("-o", "--output", help="Output directory for artifacts."),
] = None,
format: Annotated[
OutputFormat,
typer.Option(
"-f",
"--format",
help="Output format: json or msgpack.",
case_sensitive=False,
),
] = OutputFormat.JSON,
analysis_level: Annotated[
int,
typer.Option("-a", "--analysis-level", help="1: symbol table, 2: call graph."),
] = 1,
using_codeql: Annotated[
bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
] = False,
using_ray: Annotated[
bool,
typer.Option(
"--ray/--no-ray", help="Enable Ray for distributed analysis."
),
] = False,
rebuild_analysis: Annotated[
bool,
typer.Option(
"--eager/--lazy",
help="Enable eager or lazy analysis. Defaults to lazy.",
),
] = False,
skip_tests: Annotated[
bool,
typer.Option(
"--skip-tests/--include-tests",
help="Skip test files in analysis.",
),
] = True,
file_name: Annotated[
Optional[Path],
typer.Option(
"--file-name",
help="Analyze only the specified file (relative to input directory).",
),
] = None,
cache_dir: Annotated[
Optional[Path],
typer.Option(
"-c",
"--cache-dir",
help="Directory to store analysis cache. Defaults to '.codeanalyzer' in the input directory.",
),
] = None,
clear_cache: Annotated[
bool,
typer.Option("--clear-cache/--keep-cache", help="Clear cache after analysis. By default, cache is retained."),
] = False,
verbosity: Annotated[
int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
] = 0,
input: Annotated[Path, typer.Option("-i", "--input", help="Path to the project root directory.")],
output: Optional[Path] = typer.Option(None, "-o", "--output"),
format: OutputFormat = typer.Option(OutputFormat.JSON, "-f", "--format"),
analysis_level: int = typer.Option(1, "-a", "--analysis-level"),
using_codeql: bool = typer.Option(False, "--codeql/--no-codeql"),
using_ray: bool = typer.Option(False, "--ray/--no-ray"),
rebuild_analysis: bool = typer.Option(False, "--eager/--lazy"),
skip_tests: bool = typer.Option(True, "--skip-tests/--include-tests"),
file_name: Optional[Path] = typer.Option(None, "--file-name"),
cache_dir: Optional[Path] = typer.Option(None, "-c", "--cache-dir"),
clear_cache: bool = typer.Option(False, "--clear-cache/--keep-cache"),
verbosity: int = typer.Option(0, "-v", count=True),
):
"""Static Analysis on Python source code using Jedi, Astroid, and Treesitter."""
_set_log_level(verbosity)
options = AnalysisOptions(
input=input,
output=output,
format=format,
analysis_level=analysis_level,
using_codeql=using_codeql,
using_ray=using_ray,
rebuild_analysis=rebuild_analysis,
skip_tests=skip_tests,
file_name=file_name,
cache_dir=cache_dir,
clear_cache=clear_cache,
verbosity=verbosity,
)

if not input.exists():
logger.error(f"Input path '{input}' does not exist.")
_set_log_level(options.verbosity)
if not options.input.exists():
logger.error(f"Input path '{options.input}' does not exist.")
raise typer.Exit(code=1)

# Validate file_name if provided
if file_name is not None:
full_file_path = input / file_name
if options.file_name is not None:
full_file_path = options.input / options.file_name
if not full_file_path.exists():
logger.error(f"Specified file '{file_name}' does not exist in '{input}'.")
logger.error(f"Specified file '{options.file_name}' does not exist in '{options.input}'.")
raise typer.Exit(code=1)
if not full_file_path.is_file():
logger.error(f"Specified path '{file_name}' is not a file.")
logger.error(f"Specified path '{options.file_name}' is not a file.")
raise typer.Exit(code=1)
if not str(file_name).endswith('.py'):
logger.error(f"Specified file '{file_name}' is not a Python file (.py).")
if not str(options.file_name).endswith('.py'):
logger.error(f"Specified file '{options.file_name}' is not a Python file (.py).")
raise typer.Exit(code=1)

with Codeanalyzer(
input, analysis_level, skip_tests, using_codeql, rebuild_analysis, cache_dir, clear_cache, using_ray, file_name
) as analyzer:
with Codeanalyzer(options) as analyzer:
artifacts = analyzer.analyze()

# Handle output based on format
if output is None:
# Output to stdout (only for JSON)
if options.output is None:
print(model_dump_json(artifacts, separators=(",", ":")))
else:
# Output to file
output.mkdir(parents=True, exist_ok=True)
_write_output(artifacts, output, format)
options.output.mkdir(parents=True, exist_ok=True)
_write_output(artifacts, options.output, options.format)


def _write_output(artifacts, output_dir: Path, format: OutputFormat):
Expand All @@ -130,7 +85,6 @@ def _write_output(artifacts, output_dir: Path, format: OutputFormat):
f"Compression ratio: {artifacts.get_compression_ratio():.1%} of JSON size"
)


app = typer.Typer(
callback=main,
name="codeanalyzer",
Expand Down
40 changes: 13 additions & 27 deletions codeanalyzer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from codeanalyzer.syntactic_analysis.exceptions import SymbolTableBuilderRayError
from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
from codeanalyzer.utils import ProgressBar
from codeanalyzer.options import AnalysisOptions

@ray.remote
def _process_file_with_ray(py_file: Union[Path, str], project_dir: Union[Path, str], virtualenv: Union[Path, str, None]) -> Dict[str, PyModule]:
Expand Down Expand Up @@ -43,40 +44,25 @@ class Codeanalyzer:
"""Core functionality for CodeQL analysis.

Args:
project_dir (Union[str, Path]): The root directory of the project to analyze.
virtualenv (Optional[Path]): Path to the virtual environment directory.
using_codeql (bool): Whether to use CodeQL for analysis.
rebuild_analysis (bool): Whether to force rebuild the database.
clear_cache (bool): Whether to delete the cached directory after analysis.
analysis_depth (int): Depth of analysis (reserved for future use).
options (AnalysisOptions): Analysis configuration options containing all necessary parameters.
"""

def __init__(
self,
project_dir: Union[str, Path],
analysis_depth: int,
skip_tests: bool,
using_codeql: bool,
rebuild_analysis: bool,
cache_dir: Optional[Path],
clear_cache: bool,
using_ray: bool,
file_name: Optional[Path] = None,
) -> None:
self.analysis_depth = analysis_depth
self.project_dir = Path(project_dir).resolve()
self.skip_tests = skip_tests
self.using_codeql = using_codeql
self.rebuild_analysis = rebuild_analysis
def __init__(self, options: AnalysisOptions) -> None:
self.options = options
self.analysis_depth = options.analysis_level
self.project_dir = Path(options.input).resolve()
self.skip_tests = options.skip_tests
self.using_codeql = options.using_codeql
self.rebuild_analysis = options.rebuild_analysis
self.cache_dir = (
cache_dir.resolve() if cache_dir is not None else self.project_dir
options.cache_dir.resolve() if options.cache_dir is not None else self.project_dir
) / ".codeanalyzer"
self.clear_cache = clear_cache
self.clear_cache = options.clear_cache
self.db_path: Optional[Path] = None
self.codeql_bin: Optional[Path] = None
self.virtualenv: Optional[Path] = None
self.using_ray: bool = using_ray
self.file_name: Optional[Path] = file_name
self.using_ray: bool = options.using_ray
self.file_name: Optional[Path] = options.file_name

@staticmethod
def _cmd_exec_helper(
Expand Down
3 changes: 3 additions & 0 deletions codeanalyzer/options/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .options import AnalysisOptions

__all__ = ["AnalysisOptions"]
25 changes: 25 additions & 0 deletions codeanalyzer/options/options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from enum import Enum


class OutputFormat(str, Enum):
JSON = "json"
MSGPACK = "msgpack"


@dataclass
class AnalysisOptions:
input: Path
output: Optional[Path] = None
format: OutputFormat = OutputFormat.JSON
analysis_level: int = 1
using_codeql: bool = False
using_ray: bool = False
rebuild_analysis: bool = False
skip_tests: bool = True
file_name: Optional[Path] = None
cache_dir: Optional[Path] = None
clear_cache: bool = False
verbosity: int = 0
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "codeanalyzer-python"
version = "0.1.11"
version = "0.1.12"
description = "Static Analysis on Python source code using Jedi, CodeQL and Treesitter."
readme = "README.md"
authors = [
Expand Down
40 changes: 0 additions & 40 deletions test_numpy_constraints.py

This file was deleted.

Loading