diff --git a/cldk/analysis/python/__init__.py b/cldk/analysis/python/__init__.py index e95052c8..5a5f76f6 100644 --- a/cldk/analysis/python/__init__.py +++ b/cldk/analysis/python/__init__.py @@ -18,6 +18,6 @@ Python package """ -from .python import PythonAnalysis +from .python_analysis import PythonAnalysis __all__ = ["PythonAnalysis"] diff --git a/cldk/analysis/python/python.py b/cldk/analysis/python/python_analysis.py similarity index 76% rename from cldk/analysis/python/python.py rename to cldk/analysis/python/python_analysis.py index a0bf36a3..2c9f4608 100644 --- a/cldk/analysis/python/python.py +++ b/cldk/analysis/python/python_analysis.py @@ -18,10 +18,8 @@ Python module """ -from abc import ABC from pathlib import Path -from typing import Dict, List -from pandas import DataFrame +from typing import List from cldk.analysis import SymbolTable from cldk.analysis.python.treesitter import PythonSitter @@ -29,6 +27,8 @@ class PythonAnalysis(SymbolTable): + """Python Analysis Class""" + def __init__( self, analysis_backend: str, @@ -48,13 +48,13 @@ def __init__( # Initialize the analysis analysis_backend if analysis_backend.lower() == "codeql": - raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.") + raise NotImplementedError("Support for {analysis_backend} has not been implemented yet.") elif analysis_backend.lower() == "codeanalyzer": - raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.") + raise NotImplementedError("Support for {analysis_backend} has not been implemented yet.") elif analysis_backend.lower() == "treesitter": self.analysis_backend: PythonSitter = PythonSitter() else: - raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.") + raise NotImplementedError("Support for {analysis_backend} has not been implemented yet.") def get_methods(self) -> List[PyMethod]: """ @@ -89,14 +89,14 @@ def get_method_details(self, method_signature: str) -> PyMethod: def is_parsable(self, source_code: str) -> bool: """ - Check if the code is parsable - Args: - source_code: source code + Check if the code is parsable + Args: + source_code: source code - Returns: - True if the code is parsable, False otherwise + Returns: + True if the code is parsable, False otherwise """ - return PythonSitter.is_parsable(self, source_code) + return PythonSitter().is_parsable(source_code) def get_raw_ast(self, source_code: str) -> str: """ @@ -107,9 +107,9 @@ def get_raw_ast(self, source_code: str) -> str: Returns: Tree: the raw AST """ - return PythonSitter.get_raw_ast(self, source_code) + return PythonSitter().get_raw_ast(source_code) - def get_imports(self) -> List[PyImport]: + def get_imports(self) -> List[PyImport]: """ Given an application or a source code, get all the imports """ @@ -119,7 +119,7 @@ def get_variables(self, **kwargs): """ Given an application or a source code, get all the variables """ - raise NotImplementedError(f"Support for this functionality has not been implemented yet.") + raise NotImplementedError("Support for this functionality has not been implemented yet.") def get_classes(self) -> List[PyClass]: """ @@ -131,34 +131,34 @@ def get_classes_by_criteria(self, **kwargs): """ Given an application or a source code, get all the classes given the inclusion and exclution criteria """ - raise NotImplementedError(f"Support for this functionality has not been implemented yet.") + raise NotImplementedError("Support for this functionality has not been implemented yet.") def get_sub_classes(self, **kwargs): """ Given an application or a source code, get all the sub-classes """ - raise NotImplementedError(f"Support for this functionality has not been implemented yet.") + raise NotImplementedError("Support for this functionality has not been implemented yet.") def get_nested_classes(self, **kwargs): """ Given an application or a source code, get all the nested classes """ - raise NotImplementedError(f"Support for this functionality has not been implemented yet.") + raise NotImplementedError("Support for this functionality has not been implemented yet.") def get_constructors(self, **kwargs): """ Given an application or a source code, get all the constructors """ - raise NotImplementedError(f"Support for this functionality has not been implemented yet.") + raise NotImplementedError("Support for this functionality has not been implemented yet.") def get_methods_in_class(self, **kwargs): """ Given an application or a source code, get all the methods within the given class """ - raise NotImplementedError(f"Support for this functionality has not been implemented yet.") + raise NotImplementedError("Support for this functionality has not been implemented yet.") def get_fields(self, **kwargs): """ Given an application or a source code, get all the fields """ - raise NotImplementedError(f"Support for this functionality has not been implemented yet.") + raise NotImplementedError("Support for this functionality has not been implemented yet.") diff --git a/tests/analysis/python/test_python.py b/tests/analysis/python/test_python.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/analysis/python/test_python_analysis.py b/tests/analysis/python/test_python_analysis.py new file mode 100644 index 00000000..09b8a7df --- /dev/null +++ b/tests/analysis/python/test_python_analysis.py @@ -0,0 +1,283 @@ +################################################################################ +# Copyright IBM Corporation 2025 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +""" +Python Tests +""" +import os +from typing import List +from tree_sitter import Tree +import pytest + +from cldk.analysis.python import PythonAnalysis +from cldk.utils.analysis_engine import AnalysisEngine +from cldk.models.python.models import PyClass, PyImport, PyMethod, PyModule + +PYTHON_CODE = """ +import os +from typing import List +from math import * + +def env(env_var: str): -> str + return os.getenv(env_var) + +class Calculator(): + '''Calculator Class''' + + def __init__(self): + self._total = 0 + + @property + def total(self): + return self._total + + @total.setter + def total(self, value): + self._total = value + + def add(self, a, b): + total += a + b + return a + b + + def subtract(self, a, b): + total += a - b + return a - b + + def multiply(self, a, b): + total += (a * b) + return a * b + + def divide(self, a, b): + total += (a / b) + return a / b +""" + + +def test_not_implemented(): + """It should return raise a not implemented exception""" + # test with CodeQL + with pytest.raises(NotImplementedError) as except_info: + _ = PythonAnalysis( + analysis_backend=AnalysisEngine.CODEQL, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + assert except_info.type == NotImplementedError + + # test with CodeAnalyzer + with pytest.raises(NotImplementedError) as except_info: + _ = PythonAnalysis( + analysis_backend=AnalysisEngine.CODEANALYZER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + assert except_info.type == NotImplementedError + + # Test with unknown backend + with pytest.raises(NotImplementedError) as except_info: + _ = PythonAnalysis(analysis_backend="unknown", eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None) + assert except_info.type == NotImplementedError + + +def test_get_methods(): + """It should return all of the methods""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + all_methods = python_analysis.get_methods() + assert all_methods is not None + assert isinstance(all_methods, List) + assert len(all_methods) == 7 + for method in all_methods: + assert isinstance(method, PyMethod) + + +def test_get_functions(): + """It should return all of the functions""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + all_functions = python_analysis.get_functions() + assert all_functions is not None + assert isinstance(all_functions, List) + assert len(all_functions) == 1 + for method in all_functions: + assert isinstance(method, PyMethod) + + +def test_get_all_modules(tmp_path): + """It should return all of the modules""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=tmp_path, source_code=None, analysis_backend_path=None, analysis_json_path=None + ) + + # set up some temporary modules + temp_file_path = os.path.join(tmp_path, "hello.py") + with open(temp_file_path, "w", encoding="utf-8") as hello_module: + hello_module.write('print("Hello, world!")') + temp_file_path = os.path.join(tmp_path, "bye.py") + with open(temp_file_path, "w", encoding="utf-8") as bye_module: + bye_module.write('print("Goodbye, world!")') + + all_modules = python_analysis.get_modules() + assert all_modules is not None + assert isinstance(all_modules, List) + assert len(all_modules) == 2 + for module in all_modules: + assert isinstance(module, PyModule) + + +def test_get_method_details(): + """It should return the method details""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + method_details = python_analysis.get_method_details("add(self, a, b)") + assert method_details is not None + assert isinstance(method_details, PyMethod) + assert method_details.full_signature == "add(self, a, b)" + + +def test_is_parsable(): + """It should be able to parse the code""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + code = "def is_parsable(self, code: str) -> bool: return True" + is_parsable = python_analysis.is_parsable(code) + assert is_parsable is True + + code = "def is_not_parsable(self, code: str) -> bool: return True if True else" + is_parsable = python_analysis.is_parsable(code) + assert is_parsable is False + + +def test_get_raw_ast(): + """It should return the raw AST""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + raw_ast = python_analysis.get_raw_ast(PYTHON_CODE) + assert raw_ast is not None + assert isinstance(raw_ast, Tree) + assert raw_ast.root_node is not None + + +def test_get_imports(): + """It should return all of the imports""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + all_imports = python_analysis.get_imports() + assert all_imports is not None + assert isinstance(all_imports, List) + assert len(all_imports) == 3 + for py_import in all_imports: + assert isinstance(py_import, PyImport) + + +def test_get_variables(): + """It should return all of the variables""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + with pytest.raises(NotImplementedError) as except_info: + python_analysis.get_variables() + assert except_info.type == NotImplementedError + + +def test_get_classes(): + """It should return all of the classes""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + all_classes = python_analysis.get_classes() + assert all_classes is not None + assert isinstance(all_classes, List) + assert len(all_classes) == 1 + assert isinstance(all_classes[0], PyClass) + assert all_classes[0].class_name == "Calculator" + assert len(all_classes[0].methods) == 7 + + +def test_get_classes_by_criteria(): + """It should return all of the classes that match the criteria""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + with pytest.raises(NotImplementedError) as except_info: + python_analysis.get_classes_by_criteria() + assert except_info.type == NotImplementedError + + +def test_get_sub_classes(): + """It should return all of the subclasses""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + with pytest.raises(NotImplementedError) as except_info: + python_analysis.get_sub_classes() + assert except_info.type == NotImplementedError + + +def test_get_nested_classes(): + """It should return all of the nested classes""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + with pytest.raises(NotImplementedError) as except_info: + python_analysis.get_nested_classes() + assert except_info.type == NotImplementedError + + +def test_get_constructors(): + """It should return all of the constructors""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + with pytest.raises(NotImplementedError) as except_info: + python_analysis.get_constructors() + assert except_info.type == NotImplementedError + + +def test_get_methods_in_class(): + """It should return all of the methods in the class""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + with pytest.raises(NotImplementedError) as except_info: + python_analysis.get_methods_in_class() + assert except_info.type == NotImplementedError + + +def test_get_fields(): + """It should return all of the fields in the class""" + python_analysis = PythonAnalysis( + analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None + ) + + with pytest.raises(NotImplementedError) as except_info: + python_analysis.get_fields() + assert except_info.type == NotImplementedError diff --git a/tests/analysis/python/test_python_sitter.py b/tests/analysis/python/test_python_sitter.py new file mode 100644 index 00000000..bf1fd522 --- /dev/null +++ b/tests/analysis/python/test_python_sitter.py @@ -0,0 +1,212 @@ +################################################################################ +# Copyright IBM Corporation 2025 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +""" +Python Tests +""" +import os +from unittest.mock import patch +from typing import List +from tree_sitter import Tree + +from cldk.analysis.python.treesitter import PythonSitter +from cldk.models.python.models import PyClass, PyImport, PyMethod, PyModule + +PYTHON_CODE = """ +import os +from typing import List +from math import * + +def env(env_var: str): -> str + return os.getenv(env_var) + +class Calculator(): + '''Calculator Class''' + + def __init__(self): + self._total = 0 + + @property + def total(self): + return self._total + + @total.setter + def total(self, value): + self._total = value + + def add(self, a, b): + total += a + b + return a + b + + def subtract(self, a, b): + total += a - b + return a - b + + def multiply(self, a, b): + total += (a * b) + return a * b + + def divide(self, a, b): + total += (a / b) + return a / b +""" + + +def test_is_parsable(): + """It should be able to parse the code""" + python_sitter = PythonSitter() + + code = "def is_parsable(self, code: str) -> bool: return True" + is_parsable = python_sitter.is_parsable(code) + assert is_parsable is True + + code = "def is_not_parsable(self, code: str) -> bool: return True if True else" + is_parsable = python_sitter.is_parsable(code) + assert is_parsable is False + + # Test when parse returns None + with patch("cldk.analysis.python.treesitter.python_sitter.Parser.parse") as parse_mock: + parse_mock.return_value = None + code = "def is_parsable(self, code: str) -> bool: return True" + is_parsable = python_sitter.is_parsable(code) + assert is_parsable is False + + # Test exception conditions <- Not sure why this doesn't work + # with patch("cldk.analysis.python.treesitter.python_sitter.Node.children") as recursion_mock: + # recursion_mock.side_effect = RecursionError() + # code = "def is_parsable(self, code: str) -> bool: return True" + # is_parsable = python_sitter.is_parsable(code) + # assert is_parsable is False + + +def test_get_raw_ast(): + """It should return the raw AST""" + python_sitter = PythonSitter() + + raw_ast = python_sitter.get_raw_ast(PYTHON_CODE) + assert raw_ast is not None + assert isinstance(raw_ast, Tree) + assert raw_ast.root_node is not None + + +def test_get_all_methods(): + """It should return all of the methods""" + python_sitter = PythonSitter() + + all_methods = python_sitter.get_all_methods(PYTHON_CODE) + assert all_methods is not None + assert isinstance(all_methods, List) + assert len(all_methods) == 7 + for method in all_methods: + assert isinstance(method, PyMethod) + + +def test_get_all_functions(): + """It should return all of the functions""" + python_sitter = PythonSitter() + + all_functions = python_sitter.get_all_functions(PYTHON_CODE) + assert all_functions is not None + assert isinstance(all_functions, List) + assert len(all_functions) == 1 + for method in all_functions: + assert isinstance(method, PyMethod) + + +def test_get_method_details(): + """It should return the method details""" + python_sitter = PythonSitter() + + method_details = python_sitter.get_method_details(PYTHON_CODE, "add(self, a, b)") + assert method_details is not None + assert isinstance(method_details, PyMethod) + assert method_details.full_signature == "add(self, a, b)" + + # Test when get_all_methods returns empty list + with patch("cldk.analysis.python.treesitter.python_sitter.PythonSitter.get_all_methods") as method_mock: + method_mock.return_value = [] + method_details = python_sitter.get_method_details(PYTHON_CODE, "add(self, a, b)") + assert method_details is None + + +def test_get_all_imports(): + """It should return all of the imports""" + python_sitter = PythonSitter() + + all_imports = python_sitter.get_all_imports(PYTHON_CODE) + assert all_imports is not None + assert isinstance(all_imports, List) + assert len(all_imports) == 3 + assert "import os" in all_imports + assert "from typing import List" in all_imports + assert "from math import *" in all_imports + + +def test_get_module_details(): + """It should return the module details""" + python_sitter = PythonSitter() + + module_details = python_sitter.get_module_details(PYTHON_CODE) + assert module_details is not None + assert isinstance(module_details, PyModule) + assert len(module_details.functions) == 1 + assert len(module_details.classes) == 1 + assert len(module_details.imports) == 3 + + +def test_get_all_import_details(): + """It should return all of the import details""" + python_sitter = PythonSitter() + + all_import_details = python_sitter.get_all_imports_details(PYTHON_CODE) + assert all_import_details is not None + assert isinstance(all_import_details, List) + assert len(all_import_details) == 3 + for import_details in all_import_details: + assert isinstance(import_details, PyImport) + + +def test_get_all_classes(): + """It should return all of the classes""" + python_sitter = PythonSitter() + + all_classes = python_sitter.get_all_classes(PYTHON_CODE) + assert all_classes is not None + assert isinstance(all_classes, List) + assert len(all_classes) == 1 + assert isinstance(all_classes[0], PyClass) + assert all_classes[0].class_name == "Calculator" + assert len(all_classes[0].methods) == 7 + + +def test_get_all_modules(tmp_path): + """It should return all of the modules""" + python_sitter = PythonSitter() + + # set up some temporary modules + temp_file_path = os.path.join(tmp_path, "hello.py") + with open(temp_file_path, "w", encoding="utf-8") as hello_module: + hello_module.write('print("Hello, world!")') + temp_file_path = os.path.join(tmp_path, "bye.py") + with open(temp_file_path, "w", encoding="utf-8") as bye_module: + bye_module.write('print("Goodbye, world!")') + + all_modules = python_sitter.get_all_modules(tmp_path) + assert all_modules is not None + assert isinstance(all_modules, List) + assert len(all_modules) == 2 + for module in all_modules: + assert isinstance(module, PyModule)