diff --git a/.gitattributes b/.gitattributes index 83edc67b..919e0496 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,7 +5,8 @@ * text=auto eol=lf *.java ident *.xml ident +*.puml ident *.png binary *.rpm binary *.jar binary -*.pkl binary \ No newline at end of file +*.pkl binary diff --git a/.gitignore b/.gitignore index fd429011..f4c78fb9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,11 +8,9 @@ build/ dist/ .coverage venv/ -aibolit/scripts/output html sphinx .vscode/ -models/ .ipynb_checkpoints/ .mypy_cache/ *.aux diff --git a/Makefile b/Makefile index 57ab7640..f7f82a3a 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ unittest: integrationtest: python3 -m test.integration.all + python3 -m test.integration.test_model ./test/integration/test_recommend.sh install: diff --git a/README.md b/README.md index 8631ae6d..5c5e85d5 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,20 @@ [![Maintainability](https://api.codeclimate.com/v1/badges/fd7e32d8472b4d5e8ecb/maintainability)](https://codeclimate.com/github/cqfn/aibolit/maintainability) [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/cqfn/aibolit/blob/master/LICENSE.txt) + +## How it works? + +Learn how Aibolit works in our [White Paper](https://github.com/cqfn/aibolit/releases/download/1.2.5-post.1/aibolit_wp.pdf). + + + +## How to use ? + First, you install it (you must have [Python 3.7.7](https://www.python.org/downloads/) and [Pip](https://pip.pypa.io/en/stable/installing/) installed): + + ```bash $ pip3 install aibolit ``` @@ -242,10 +253,19 @@ $ aibolit recommend --help 2. Go to `cloned_aibolit_path` 3. Run `pip install .` 4. Set env variable `export HOME_AIBOLIT=cloned_aibolit_path` (example for Linux). - 5. If you need to set up own directory where model will be saved, set up also `SAVE_MODEL_FOLDER` environment variable. + 5. Set env variable `TARGET_FOLDER` if you need to save all dataset files to another directory. + 6. You have to specify train and test dataset: set the `HOME_TRAIN_DATASET` environment variable + for train dataset and the `HOME_TEST_DATASET` environment variable for test dataset. + Usually, these files are in `scripts/target/08` directory after dataset collection (if you have not skipped it). + But you can use your own datasets. + + Please notice, that if you set `TARGET_FOLDER`, your dataset files will be in `TARGET_FOLDER/target`. + That is why it is necessary to + set HOME_TRAIN_DATASET=`TARGET_FOLDER`\target\08\08-train.csv, + HOME_TEST_DATASET =`TARGET_FOLDER`\target\08\08-test.csv + 7. If you need to set up own directory where model will be saved, set up also `SAVE_MODEL_FOLDER` environment variable. Otherwise model will be saved into `cloned_aibolit_path/aibolit/binary_files/model.pkl` - 6. If you need to set up own folder with Java files, use `--java_folder parameter`, the default value will be `scripts/target/01` of aibolit cloned repo - 7. You need to install Java 13 and Maven + 8. If you need to set up own folder with Java files, use `--java_folder parameter`, the default value will be `scripts/target/01` of aibolit cloned repo Or you can use our docker image (link will be soon here) @@ -261,6 +281,13 @@ If you need to save the dataset with all calculated metrics to a different direc $ aibolit train --java_folder=src/java --dataset_file /mnt/d/new_dir/dataset.csv ``` +You can skip dataset collection with `skip_collect_dataset` parameter. In this case +the model will be trained with predefined dataset (see 5 point): + +```bash +$ aibolit train --java_folder=src/java --skip_collect_dataset +``` + ## How to contribute? First, you need to install: diff --git a/aibolit/__main__.py b/aibolit/__main__.py index 2907f5d1..b5c63be1 100644 --- a/aibolit/__main__.py +++ b/aibolit/__main__.py @@ -49,6 +49,7 @@ from aibolit.config import Config from aibolit.ml_pipeline.ml_pipeline import train_process, collect_dataset from aibolit.utils.ast_builder import build_ast +from javalang.parser import JavaSyntaxError dir_path = os.path.dirname(os.path.realpath(__file__)) @@ -72,7 +73,7 @@ def predict(input_params, model, args): # deepcode ignore ExpectsIntDislikesStr: false-positive recommendation of deepcode input = [input_params[i] for i in features_order] + [input_params['M2']] th = float(args.threshold) or 1.0 - preds, importances = model.informative(input, th=th) + preds, importances = model.rank(input, th=th) return {features_order[int(x)]: int(x) for x in preds}, list(importances) @@ -259,7 +260,7 @@ def traverse(node): def calculate_patterns_and_metrics(file, args): code_lines_dict = input_params = {} # type: ignore - error_string = None + error_exc = None patterns_to_suppress = args.suppress try: config = Config.get_patterns_config() @@ -276,10 +277,10 @@ def calculate_patterns_and_metrics(file, args): continue __count_value(metric, input_params, code_lines_dict, file, is_metric=True) except Exception as ex: - error_string = str(ex) + error_exc = ex input_params = [] # type: ignore - return input_params, code_lines_dict, error_string + return input_params, code_lines_dict, error_exc def inference( @@ -331,7 +332,27 @@ def inference( return results -def run_recommend_for_file(file: str, args): +def create_results( + input_params: List[int], + code_lines_dict: Dict[Any, Any], + args: argparse.Namespace, + classes_with_patterns_ignored: List[Any], + patterns_ignored: Dict[Any, Any]): + results_list = inference(input_params, code_lines_dict, args) + new_results: List[Any] = [] + for pattern_item in results_list: + # check if the whole class is suppressed + if pattern_item['pattern_code'] not in classes_with_patterns_ignored: + # then check if patterns are ignored in fields or functions + add_pattern_if_ignored(patterns_ignored, pattern_item, new_results) + # add_pattern_if_ignored(patterns_for_fields_ignored, pattern_item, new_results) + else: + continue + + return new_results + + +def run_recommend_for_file(file: str, args): # flake8: noqa """ Calculate patterns and metrics, pass values to model and suggest pattern to change :param file: file to analyze @@ -339,47 +360,46 @@ def run_recommend_for_file(file: str, args): :return: dict with code lines, filename and pattern name """ java_file = str(Path(os.getcwd(), file)) - tree = build_ast(file) - classes_with_annonations = find_annotation_by_node_type(tree, javalang.tree.ClassDeclaration) - functions_with_annotations = find_annotation_by_node_type(tree, javalang.tree.MethodDeclaration) - fields_with_annotations = find_annotation_by_node_type(tree, javalang.tree.FieldDeclaration) - classes_with_patterns_ignored = flatten( - [pattern_code for node, pattern_code in classes_with_annonations.items()]) - patterns_ignored = defaultdict(list) - - for node, patterns_list in functions_with_annotations.items(): - start_pos, end_pos = find_start_and_end_lines(node) - for p in patterns_list: - patterns_ignored[p].append([start_pos, end_pos]) - - for node, patterns_list in fields_with_annotations.items(): - for p in patterns_list: - patterns_ignored[p].append([node.position.line, node.position.line]) - - input_params, code_lines_dict, error_string = calculate_patterns_and_metrics(java_file, args) - - if not input_params: - results_list = [] # type: ignore - error_string = 'Empty java file; ncss = 0' - # deepcode ignore ExpectsIntDislikesStr: False positive - elif input_params['M2'] == 0: - results_list = [] # type: ignore - error_string = 'Empty java file; ncss = 0' - else: - results_list = inference(input_params, code_lines_dict, args) - new_results: List[Any] = [] - for pattern_item in results_list: - # check if the whole class is suppressed - if pattern_item['pattern_code'] not in classes_with_patterns_ignored: - # then check if patterns are ignored in fields or functions - add_pattern_if_ignored(patterns_ignored, pattern_item, new_results) - # add_pattern_if_ignored(patterns_for_fields_ignored, pattern_item, new_results) - else: - continue - - results_list = new_results - - if error_string: + try: + tree = build_ast(file) + classes_with_annonations = find_annotation_by_node_type(tree, javalang.tree.ClassDeclaration) + functions_with_annotations = find_annotation_by_node_type(tree, javalang.tree.MethodDeclaration) + fields_with_annotations = find_annotation_by_node_type(tree, javalang.tree.FieldDeclaration) + classes_with_patterns_ignored = flatten( + [pattern_code for node, pattern_code in classes_with_annonations.items()]) + patterns_ignored = defaultdict(list) + + for node, patterns_list in functions_with_annotations.items(): + start_pos, end_pos = find_start_and_end_lines(node) + for p in patterns_list: + patterns_ignored[p].append([start_pos, end_pos]) + + for node, patterns_list in fields_with_annotations.items(): + for p in patterns_list: + patterns_ignored[p].append([node.position.line, node.position.line]) + + input_params, code_lines_dict, error_exception = calculate_patterns_and_metrics(java_file, args) + + if not input_params: + results_list = [] # type: ignore + error_exception = 'Empty java file; ncss = 0' + # deepcode ignore ExpectsIntDislikesStr: False positive + elif input_params['M2'] == 0: + results_list = [] # type: ignore + error_exception = 'Empty java file; ncss = 0' + else: + results_list = create_results( + input_params, + code_lines_dict, + args, + classes_with_patterns_ignored, + patterns_ignored) + + except Exception as e: + error_exception = e + results_list = [] + + if error_exception: ncss = 0 else: ncss = input_params.get('M4', 0) @@ -387,7 +407,7 @@ def run_recommend_for_file(file: str, args): return { 'filename': file, 'results': results_list, - 'error_string': error_string, + 'exception': error_exception, 'ncss': ncss, } @@ -411,7 +431,7 @@ def create_xml_tree(results, full_report, cmd, exit_code): files_number_tag.addprevious(etree.Comment('Files with found patterns')) files_number_tag.text = str(len( [x for x in results - if (not x.get('error_string') and x.get('results'))] + if (not x.get('exception') and x.get('results'))] )) patterns_number_tag = etree.SubElement(header_tag, 'patterns') ncss_tag = etree.SubElement(header_tag, 'ncss') @@ -433,7 +453,8 @@ def create_xml_tree(results, full_report, cmd, exit_code): output_string_tag = etree.SubElement(file, 'summary') name.text = filename results_item = result_for_file.get('results') - errors_string = result_for_file.get('error_string') + ex = result_for_file.get('exception') + errors_string = str(result_for_file.get('exception')) or type(ex).__name__ if not results_item and not errors_string: output_string = 'Your code is perfect in aibolit\'s opinion' output_string_tag.text = output_string @@ -486,15 +507,21 @@ def get_exit_code(results): files_analyzed = len(results) errors_number = 0 perfect_code_number = 0 + errors_strings = [] for result_for_file in results: results = result_for_file.get('results') - errors_string = result_for_file.get('error_string') - if not results and not errors_string: + ex = result_for_file.get('exception') + if not results and not ex: perfect_code_number += 1 - elif not results and errors_string: - errors_number += 1 + elif not results and ex: + if not isinstance(ex, JavaSyntaxError): + errors_strings.append(ex) + errors_number += 1 + else: + # ignore JavaSyntaxError, it is expected error + perfect_code_number += 1 - if errors_number == files_analyzed: + if len(errors_strings) == files_analyzed: # we have errors everywhere exit_code = 2 elif perfect_code_number == files_analyzed: @@ -518,17 +545,17 @@ def create_text(results, full_report, is_long=False): for result_for_file in results: filename = result_for_file.get('filename') results_item = result_for_file.get('results') - errors_string = result_for_file.get('error_string') - if not results_item and not errors_string: + ex = result_for_file.get('exception') + if not results_item and not ex: # Do nothing, patterns were not found pass - if not results_item and errors_string: + if not results_item and ex: output_string = '{}: error when calculating patterns: {}'.format( filename, - str(errors_string) + str(ex) or type(ex).__name__ ) buffer.append(output_string) - elif results_item and not errors_string: + elif results_item and not ex: # get unique patterns score patterns_scores = print_total_score_for_file(buffer, filename, importances_for_all_classes, result_for_file) patterns_number = len(patterns_scores) diff --git a/aibolit/ast_framework/__init__.py b/aibolit/ast_framework/__init__.py index f4076848..bd3b6442 100644 --- a/aibolit/ast_framework/__init__.py +++ b/aibolit/ast_framework/__init__.py @@ -1,3 +1,10 @@ from aibolit.ast_framework.ast_node_type import ASTNodeType # noqa: F401 from aibolit.ast_framework.ast_node import ASTNode # noqa: F401 from aibolit.ast_framework.ast import AST # noqa: F401 + +# register all standard computed fields from 'computed_fields_catalog' +from aibolit.ast_framework.computed_fields_catalog.standard_fields import ( + register_standard_computed_properties, +) + +register_standard_computed_properties() diff --git a/aibolit/ast_framework/ast_node.py b/aibolit/ast_framework/ast_node.py index bf1271eb..6329bd95 100644 --- a/aibolit/ast_framework/ast_node.py +++ b/aibolit/ast_framework/ast_node.py @@ -25,8 +25,13 @@ from networkx import DiGraph, dfs_preorder_nodes # type: ignore from cached_property import cached_property # type: ignore +from aibolit.ast_framework._auxiliary_data import ( + common_attributes, + attributes_by_node_type, + ASTNodeReference, +) from aibolit.ast_framework import ASTNodeType -from aibolit.ast_framework._auxiliary_data import common_attributes, attributes_by_node_type, ASTNodeReference +from aibolit.ast_framework.computed_fields_registry import computed_fields_registry class ASTNode: @@ -35,7 +40,7 @@ def __init__(self, graph: DiGraph, node_index: int): self._node_index = node_index @property - def children(self) -> Iterator['ASTNode']: + def children(self) -> Iterator["ASTNode"]: for child_index in self._graph.succ[self._node_index]: yield ASTNode(self._graph, child_index) @@ -68,17 +73,29 @@ def line(self) -> int: if line is not None: return line - raise RuntimeError(f"Failed to retrieve source code line information for {repr(self)} node. " - "All nodes in a path from root to it and all nodes, reachable from it, " - "does not have any source code line information either.") + raise RuntimeError( + f"Failed to retrieve source code line information for {repr(self)} node. " + "All nodes in a path from root to it and all nodes, reachable from it, " + "does not have any source code line information either." + ) def __getattr__(self, attribute_name: str): - if attribute_name not in common_attributes: - node_type = self._get_type(self._node_index) - if(attribute_name not in attributes_by_node_type[node_type]): - raise AttributeError(f'{node_type} node does not have "{attribute_name}" attribute.') + node_type = self._get_type(self._node_index) + existing_fields_names = attributes_by_node_type[node_type] + computed_fields = computed_fields_registry.get_fields(node_type) + if attribute_name not in common_attributes and \ + attribute_name not in existing_fields_names and \ + attribute_name not in computed_fields: + raise AttributeError( + "Failed to retrieve property. " + f"'{node_type}' node does not have '{attribute_name}' attribute." + ) + + if attribute_name in computed_fields: + attribute = computed_fields[attribute_name](self) + else: + attribute = self._graph.nodes[self._node_index][attribute_name] - attribute = self._graph.nodes[self._node_index][attribute_name] if isinstance(attribute, ASTNodeReference): attribute = self._create_node_from_reference(attribute) elif isinstance(attribute, list): @@ -88,32 +105,47 @@ def __getattr__(self, attribute_name: str): def __dir__(self) -> List[str]: node_type = self._get_type(self._node_index) return ASTNode._public_fixed_interface + \ - list(common_attributes) + list(attributes_by_node_type[node_type]) + list(common_attributes) + \ + list(attributes_by_node_type[node_type]) + \ + list(computed_fields_registry.get_fields(node_type).keys()) def __str__(self) -> str: - text_representation = f'node index: {self._node_index}' + text_representation = f"node index: {self._node_index}" node_type = self._get_type(self._node_index) - for attribute_name in sorted(common_attributes | attributes_by_node_type[node_type]): + for attribute_name in sorted( + common_attributes | attributes_by_node_type[node_type] + ): attribute_value = self.__getattr__(attribute_name) - attribute_representation = \ - repr(attribute_value) if isinstance(attribute_value, ASTNode) else str(attribute_value) - text_representation += f'\n{attribute_name}: {attribute_representation}' + + if isinstance(attribute_value, ASTNode): + attribute_representation = repr(attribute_value) + elif isinstance(attribute_value, str) and "\n" in attribute_value: + attribute_representation = "\n\t" + attribute_value.replace( + "\n", "\n\t" + ) + else: + attribute_representation = str(attribute_value) + + text_representation += f"\n{attribute_name}: {attribute_representation}" return text_representation def __repr__(self) -> str: - return f'' + return f"" def __eq__(self, other: object) -> bool: if not isinstance(other, ASTNode): - raise NotImplementedError(f'ASTNode support comparission only with themselves, \ - but {type(other)} was provided.') + raise NotImplementedError( + f"ASTNode support comparission only with themselves, but {type(other)} was provided." + ) return self._graph == other._graph and self._node_index == other._node_index def __hash__(self): return hash(self._node_index) - def _replace_references_with_nodes(self, list_with_references: List[Any]) -> List[Any]: + def _replace_references_with_nodes( + self, list_with_references: List[Any] + ) -> List[Any]: list_with_nodes: List[Any] = [] for item in list_with_references: if isinstance(item, ASTNodeReference): @@ -123,24 +155,26 @@ def _replace_references_with_nodes(self, list_with_references: List[Any]) -> Lis elif isinstance(item, (int, str)) or item is None: list_with_nodes.append(item) else: - raise RuntimeError('Failed parsing attribute.\n' - f'An {item} with {type(item)} was found.\n' - 'Expected: int, str, ASNodeReference of list of them.') + raise RuntimeError( + "Failed parsing attribute.\n" + f"An {item} with {type(item)} was found.\n" + "Expected: int, str, ASNodeReference of list of them." + ) return list_with_nodes - def _create_node_from_reference(self, reference: ASTNodeReference) -> 'ASTNode': + def _create_node_from_reference(self, reference: ASTNodeReference) -> "ASTNode": return ASTNode(self._graph, reference.node_index) def _get_type(self, node_index: int) -> ASTNodeType: - return self._graph.nodes[node_index]['node_type'] + return self._graph.nodes[node_index]["node_type"] def _get_line(self, node_index: int) -> Optional[int]: - return self._graph.nodes[node_index]['line'] + return self._graph.nodes[node_index]["line"] def _get_parent(self, node_index: int) -> Optional[int]: # there is maximum one parent in a tree return next(self._graph.predecessors(node_index), None) # names of methods and properties, which is not generated dynamically - _public_fixed_interface = ['children', 'node_index', 'subtree', 'line'] + _public_fixed_interface = ["children", "node_index", "line"] diff --git a/aibolit/patterns/this_finder/__init__.py b/aibolit/ast_framework/computed_fields_catalog/__init__.py similarity index 100% rename from aibolit/patterns/this_finder/__init__.py rename to aibolit/ast_framework/computed_fields_catalog/__init__.py diff --git a/aibolit/ast_framework/computed_fields_catalog/chained_fields.py b/aibolit/ast_framework/computed_fields_catalog/chained_fields.py new file mode 100644 index 00000000..13b1888f --- /dev/null +++ b/aibolit/ast_framework/computed_fields_catalog/chained_fields.py @@ -0,0 +1,65 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from typing import Union, Any, Callable +from itertools import chain + +from aibolit.ast_framework import ASTNode + + +def chain_field_getter_factory(*steps: Union[str, int]) -> Callable[[ASTNode], Any]: + """ + A chained field is a field of some other field and so on. + For example name of class field is retrieved like `field_decl.declarators[0].name` using javalang fields. + To automate this chain fields can be used. + Chain field is specified by sequence of strings (field names) and integers (list indexes). + If a field is list of ASTNodes and next step in chain is string + then we get that field from every node in list. + If we get list of lists, it gets flatten. + List with single items is unwrapped. + """ + + def get_chain_field(node: ASTNode) -> Any: + field = node + for step in steps: + if isinstance(field, list) and \ + isinstance(step, str) and \ + all(isinstance(item, ASTNode) for item in field): + # get attribute from all elements from a list + field = [getattr(item, step) for item in field] + + if all(isinstance(item, list) for item in field): # flattening list + field = list(chain.from_iterable(field)) + + elif isinstance(field, list) and isinstance(step, int): + field = field[step] + elif isinstance(field, ASTNode) and isinstance(step, str): + field = getattr(field, step) + else: + raise RuntimeError(f"Failed to apply step {step} to field {field}.") + + if isinstance(field, list) and len(field) == 1: + field = field[0] + + return field + + return get_chain_field diff --git a/test/metrics/fanout/test_all_types.py b/aibolit/ast_framework/computed_fields_catalog/nodes_filter.py similarity index 53% rename from test/metrics/fanout/test_all_types.py rename to aibolit/ast_framework/computed_fields_catalog/nodes_filter.py index 4233c4d8..5b212038 100644 --- a/test/metrics/fanout/test_all_types.py +++ b/aibolit/ast_framework/computed_fields_catalog/nodes_filter.py @@ -20,32 +20,31 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import os -from unittest import TestCase -from aibolit.metrics.fanout.FanOut import FanOut -from pathlib import Path - - -class TestFanOut(TestCase): - dir_path = Path(os.path.realpath(__file__)).parent - fanout = FanOut() - - def test_1(self): - lines = self.fanout.value(Path(self.dir_path, '1.java')) - self.assertEqual(lines, 3) - - def test_2(self): - lines = self.fanout.value(Path(self.dir_path, '2.java')) - self.assertEqual(lines, 2) - - def test_3(self): - lines = self.fanout.value(Path(self.dir_path, '3.java')) - self.assertEqual(lines, 2) - - def test_4(self): - lines = self.fanout.value(Path(self.dir_path, '4.java')) - self.assertEqual(lines, 1) - - def test_5(self): - lines = self.fanout.value(Path(self.dir_path, '5.java')) - self.assertEqual(lines, 1) +from typing import Callable, Iterator + +from aibolit.ast_framework import ASTNode, ASTNodeType + + +def nodes_filter_factory( + base_field_name: str, *node_types: ASTNodeType +) -> Callable[[ASTNode], Iterator[ASTNode]]: + """ + Create filter, which takes 'body_field_name' field of incoming node, + checks if it list of ASTNode, and return it filtered by node_type. + """ + + def filter(base_node: ASTNode) -> Iterator[ASTNode]: + base_field = getattr(base_node, base_field_name) + if isinstance(base_field, list) and all( + isinstance(item, ASTNode) for item in base_field + ): + for node in base_field: + if node.node_type in node_types: + yield node + else: + raise RuntimeError( + f"Failed computing ASTNode field based on {base_field_name} field. " + f"Expected list, but got {base_field} of type {type(base_field)}." + ) + + return filter diff --git a/aibolit/ast_framework/computed_fields_catalog/standard_fields.py b/aibolit/ast_framework/computed_fields_catalog/standard_fields.py new file mode 100644 index 00000000..fdb530f4 --- /dev/null +++ b/aibolit/ast_framework/computed_fields_catalog/standard_fields.py @@ -0,0 +1,83 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from aibolit.ast_framework.computed_fields_registry import computed_fields_registry +from aibolit.ast_framework import ASTNodeType + +from .nodes_filter import nodes_filter_factory +from .chained_fields import chain_field_getter_factory + + +def register_standard_computed_properties() -> None: + _register_standard_nodes_filters() + _register_standard_chain_fields() + + +def _register_standard_nodes_filters() -> None: + computed_fields_registry.register( + nodes_filter_factory("body", ASTNodeType.CONSTRUCTOR_DECLARATION), + "constructors", + ASTNodeType.CLASS_DECLARATION, + ASTNodeType.INTERFACE_DECLARATION, + ASTNodeType.ANNOTATION_DECLARATION, + ) + + computed_fields_registry.register( + nodes_filter_factory( + "body", ASTNodeType.CONSTRUCTOR_DECLARATION, ASTNodeType.METHOD_DECLARATION + ), + "methods", + ASTNodeType.CLASS_DECLARATION, + ASTNodeType.INTERFACE_DECLARATION, + ASTNodeType.ANNOTATION_DECLARATION, + ) + + computed_fields_registry.register( + nodes_filter_factory("body", ASTNodeType.FIELD_DECLARATION), + "fields", + ASTNodeType.CLASS_DECLARATION, + ASTNodeType.INTERFACE_DECLARATION, + ASTNodeType.ANNOTATION_DECLARATION, + ) + + computed_fields_registry.register( + nodes_filter_factory("declarations", ASTNodeType.METHOD_DECLARATION), + "methods", + ASTNodeType.ENUM_DECLARATION, + ) + + computed_fields_registry.register( + nodes_filter_factory("declarations", ASTNodeType.FIELD_DECLARATION), + "fields", + ASTNodeType.ENUM_DECLARATION, + ) + + +def _register_standard_chain_fields() -> None: + computed_fields_registry.register( + chain_field_getter_factory("declarators", "name"), + "name", + ASTNodeType.CONSTANT_DECLARATION, + ASTNodeType.FIELD_DECLARATION, + ASTNodeType.LOCAL_VARIABLE_DECLARATION, + ASTNodeType.VARIABLE_DECLARATION, + ) diff --git a/aibolit/ast_framework/computed_fields_registry.py b/aibolit/ast_framework/computed_fields_registry.py new file mode 100644 index 00000000..028aae14 --- /dev/null +++ b/aibolit/ast_framework/computed_fields_registry.py @@ -0,0 +1,69 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from collections import defaultdict +import sys +from typing import Dict, Callable, Any, TYPE_CHECKING + +if TYPE_CHECKING: + from aibolit.ast_framework import ASTNode, ASTNodeType # noqa: F401 + + +class _ComputedFieldsRegistry: + def __init__(self) -> None: + RegistryType = Dict["ASTNodeType", Dict[str, Callable[["ASTNode"], Any]]] + self._registry: RegistryType = defaultdict(dict) + + def register( + self, + compute_field: Callable[["ASTNode"], Any], + name: str, + *node_types: "ASTNodeType", + ) -> None: + for node_type in node_types: + computed_fields = self._registry[node_type] + if name in computed_fields and \ + not _ComputedFieldsRegistry._is_in_interactive_shell(): + raise RuntimeError( + f"Registry already has computed field " + f"named '{name}' for node type {node_type}." + ) + + computed_fields[name] = compute_field + + def get_fields( + self, node_type: "ASTNodeType" + ) -> Dict[str, Callable[["ASTNode"], Any]]: + return self._registry[node_type] + + def clear(self) -> None: + self._registry = defaultdict(dict) + + @staticmethod + def _is_in_interactive_shell() -> bool: + """ + Taken from comments to this answer https://stackoverflow.com/a/6879085/2129920 + """ + return bool(getattr(sys, "ps1", sys.flags.interactive)) + + +computed_fields_registry = _ComputedFieldsRegistry() diff --git a/aibolit/binary_files/model.pkl b/aibolit/binary_files/model.pkl index e0f6101b..dc7a0ff0 100644 Binary files a/aibolit/binary_files/model.pkl and b/aibolit/binary_files/model.pkl differ diff --git a/aibolit/config.py b/aibolit/config.py index 9d65bcad..8fa1a763 100644 --- a/aibolit/config.py +++ b/aibolit/config.py @@ -36,7 +36,7 @@ from aibolit.patterns.send_null.send_null import SendNull as P31 from aibolit.patterns.string_concat.string_concat import StringConcatFinder as P17 from aibolit.patterns.supermethod.supermethod import SuperMethod as P18 -from aibolit.patterns.this_finder.this_finder import ThisFinder as P19 +from aibolit.patterns.hybrid_constructor.hybrid_constructor import HybridConstructor as P19 from aibolit.patterns.var_decl_diff.var_decl_diff import VarDeclarationDistance as P20 from aibolit.patterns.var_middle.var_middle import VarMiddle as P21 from aibolit.patterns.var_siblings.var_siblings import VarSiblings as P27 @@ -73,6 +73,14 @@ def dataset_file(): dataset_path = Path(Config().home_aibolit_folder(), 'scripts', 'target', 'dataset.csv') return os.environ.get('HOME_DATASET_CSV') or dataset_path + @staticmethod + def train_csv(): + return os.environ.get('HOME_TRAIN_DATASET') + + @staticmethod + def test_csv(): + return os.environ.get('HOME_TEST_DATASET') + @staticmethod def get_patterns_config(): return { diff --git a/aibolit/metrics/RFC/rfc.py b/aibolit/metrics/RFC/rfc.py index f2746fdb..7259a0f8 100644 --- a/aibolit/metrics/RFC/rfc.py +++ b/aibolit/metrics/RFC/rfc.py @@ -1,71 +1,92 @@ -from aibolit.ast_framework import ASTNodeType -from aibolit.ast_framework.java_package import JavaPackage -from typing import Set, Any +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from typing import Set, NamedTuple + +from aibolit.ast_framework import AST, ASTNode, ASTNodeType + + +class _MethodInvocationParams(NamedTuple): + isLocal: bool + name: str class RFC: - ''' - The Response For a Class is an object-oriented - metric that shows the interaction of the class' - methods with other methods. - ''' - def __init__(self): - self.class_methods = {} + """ + The Response For a Class (RFC) is an object-oriented metric + that shows the interaction of the class methods with other methods. + This implementation accept arbitrary AST and + return sum of RFC for all class declaration in it. + To calculate RFC of a class we count number of public methods and + number of distinct methods invocation in those methods. + """ + + def value(self, ast: AST) -> int: + rfc = 0 + for class_declaration in ast.get_proxy_nodes(ASTNodeType.CLASS_DECLARATION): + rfc += self._calculate_class_RFC(ast.get_subtree(class_declaration)) + + return rfc - def exclude_inhereted_methods(self) -> Set[Any]: - temp = self.class_methods.copy() - for each_method in self.class_methods.keys(): - counter = 0 - for i in self.class_methods[each_method]: - if i in self.class_methods: - counter += 1 - else: - counter -= 1 - if counter > 0: - temp.pop(each_method) - final_rfc = set() - for declared_method in temp.keys(): - if len(declared_method) != 0: - final_rfc.add(declared_method) + def _calculate_class_RFC(self, java_class: AST) -> int: + class_declaration = java_class.get_root() + assert class_declaration.node_type == ASTNodeType.CLASS_DECLARATION - for invoked_methods in temp.values(): - for each_method in invoked_methods: - final_rfc.add(each_method) - return final_rfc + rfc = 0 + invoked_methods: Set[_MethodInvocationParams] = set() + local_methods_names: Set[str] = set() + for class_item in class_declaration.body: + if class_item.node_type == ASTNodeType.METHOD_DECLARATION: + local_methods_names.add(class_item.name) + if "public" in class_item.modifiers: + rfc += 1 + invoked_methods |= self._get_all_method_invocation_params( + java_class.get_subtree(class_item) + ) - def get_invoked(self, tree) -> Set[Any]: - inv_names = set() - inv_methods = tree.get_nodes(ASTNodeType.METHOD_INVOCATION) - for inv_method in inv_methods: - name_of_invoked_class = tree.get_method_invocation_params(inv_method) - current_name = name_of_invoked_class.method_name - inv_names.add(current_name) - return inv_names + # filter out inherited methods + # consider local methods with name not found + # among methods names of current class as inherited + invoked_methods = { + invoked_method + for invoked_method in invoked_methods + if not invoked_method.isLocal or invoked_method.name in local_methods_names + } - def value(self, filename: str) -> int: # noqa: C901 - p = JavaPackage(filename) - for class_name in p.java_classes: - tree = p.java_classes[class_name] - declareted_methods = tree.get_subtrees(ASTNodeType.METHOD_DECLARATION) - for class_method in declareted_methods: - # to form a set of all methods in the class - names = list(class_method.children_with_type(class_method.root, ASTNodeType.STRING)) - for each_string in names: - method_name = tree.get_attr(each_string, 'string') - # we need to check the name because even comments are counted as the childs with string type - # need to get rid of them - if not method_name.startswith('/'): - self.class_methods[method_name] = set() - break + rfc += len(invoked_methods) + return rfc - # to count invoked methods - tree = p.java_classes[class_name] - declareted_methods = tree.get_subtrees(ASTNodeType.METHOD_DECLARATION) - for meth_name, class_method in zip(self.class_methods.keys(), declareted_methods): - invoked_names = self.get_invoked(class_method) - for i in invoked_names: - self.class_methods[meth_name].add(i) + def _get_all_method_invocation_params( + self, ast: AST + ) -> Set[_MethodInvocationParams]: + return { + self._create_method_invocation_params(method_invocation) + for method_invocation in ast.get_proxy_nodes(ASTNodeType.METHOD_INVOCATION) + } - final_rfc = len(self.exclude_inhereted_methods()) - self.class_methods.clear() - return final_rfc + def _create_method_invocation_params( + self, method_invocation: ASTNode + ) -> _MethodInvocationParams: + assert method_invocation.node_type == ASTNodeType.METHOD_INVOCATION + return _MethodInvocationParams( + isLocal=len(method_invocation.qualifier) == 0, name=method_invocation.member + ) diff --git a/aibolit/metrics/fanout/FanOut.py b/aibolit/metrics/fanout/FanOut.py index 5eb9d320..9e79ed28 100644 --- a/aibolit/metrics/fanout/FanOut.py +++ b/aibolit/metrics/fanout/FanOut.py @@ -1,66 +1,112 @@ -from aibolit.utils.ast_builder import build_ast -from aibolit.ast_framework import AST, ASTNodeType -from itertools import islice -from aibolit.ast_framework.java_package import JavaPackage +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from typing import Set + +from aibolit.ast_framework import AST, ASTNode, ASTNodeType class FanOut: - ''' + """ Fan Out metric is defined as the number of other classes referenced by a class. - ''' - def __init__(self): - pass + """ + + def value(self, ast: AST) -> int: + fan_out = 0 + for class_declaration in ast.get_subtrees(ASTNodeType.CLASS_DECLARATION): + fan_out += self._calculate_class_fan_out(class_declaration) + + return fan_out + + def _calculate_class_fan_out(self, java_class: AST) -> int: + class_declaration = java_class.get_root() + assert class_declaration.node_type == ASTNodeType.CLASS_DECLARATION - def value(self, filename: str) -> int: # noqa: C901 + used_classes_names: Set[str] = set() - # exception are used from https://checkstyle.sourceforge.io/config_metrics.html#ClassFanOutComplexity - considered_classes = {'ArrayIndexOutOfBoundsException': 0, 'ArrayList': 0, 'Boolean': 0, 'Byte': 0, - 'Character': 0, 'Class': 0, 'Deprecated': 0, 'Deque': 0, 'Double': 0, - 'Exception': 0, 'Float': 0, 'FunctionalInterface': 0, 'HashMap': 0, - 'HashSet': 0, 'IllegalArgumentException': 0, 'IllegalStateException': 0, - 'IndexOutOfBoundsException': 0, 'Integer': 0, 'LinkedList': 0, 'List': 0, - 'Long': 0, 'Map': 0, 'NullPointerException': 0, 'Object': 0, 'Override': 0, - 'Queue': 0, 'RuntimeException': 0, 'SafeVarargs': 0, 'SecurityException': 0, - 'Set': 0, 'Short': 0, 'SortedMap': 0, 'SortedSet': 0, 'String': 0, 'StringBuffer': 0, - 'StringBuilder': 0, 'SuppressWarnings': 0, 'Throwable': 0, 'short': 0, 'void': 0, - 'TreeMap': 0, 'TreeSet': 0, 'UnsupportedOperationException': 0, 'Void': 0, - 'System.out': 0, 'boolean': 0, 'byte': 0, 'char': 0, 'double': 0, 'float': 0, - 'int': 0, 'long': 0, - } - fan_outs = 0 + for type_reference in java_class.get_proxy_nodes(ASTNodeType.REFERENCE_TYPE): + used_class_name = self._get_class_name_from_type_reference(type_reference) + if used_class_name not in FanOut._excluded_class_names: + used_classes_names.add(used_class_name) - # check imported classes - tree = AST.build_from_javalang(build_ast(filename)) - for each_import in (tree.children_with_type(tree.root, ASTNodeType.IMPORT)): - name_node, = islice(tree.children_with_type(each_import, ASTNodeType.STRING), 1) - new_class = tree.get_attr(name_node, 'string').split('.')[-1] - if considered_classes.get(new_class) is None: - fan_outs += 1 - considered_classes[new_class] = 0 + # remove name of the class + used_classes_names -= {class_declaration.name} + return len(used_classes_names) - p = JavaPackage(filename) - for class_name in p.java_classes: - tree = p.java_classes[class_name] - for var_node in tree.get_nodes(ASTNodeType.VARIABLE_DECLARATOR): - var_child = list(tree.children_with_type(var_node, ASTNodeType.STRING)) - new_class_name = tree.get_attr(var_child[0], 'string') + def _get_class_name_from_type_reference(self, type_reference: ASTNode) -> str: + assert type_reference.node_type == ASTNodeType.REFERENCE_TYPE - for class_creator_node in tree.children_with_type(var_node, ASTNodeType.CLASS_CREATOR): - for go_to_name in tree.children_with_type(class_creator_node, ASTNodeType.REFERENCE_TYPE): - classC_child = list(tree.children_with_type(go_to_name, ASTNodeType.STRING)) - used_class_name = tree.get_attr(classC_child[0], 'string') - if considered_classes.get(used_class_name) is None: - considered_classes[used_class_name] = 0 - fan_outs += 1 - if considered_classes.get(new_class_name) is None: - considered_classes[new_class_name] = 0 + # type_reference 'name' field may not have a name of class in case this class + # is referenced from packages, like 'package1.package2.class'. + # To get class name we need to iterate over subtypes first and in the end get class name. + while isinstance(type_reference.sub_type, ASTNode): + type_reference = type_reference.sub_type - # check classes of invokated methods - for i in tree.get_nodes(ASTNodeType.STATEMENT_EXPRESSION): - for invoked_method_child in tree.children_with_type(i, ASTNodeType.METHOD_INVOCATION): - name_of_invoked_class = tree.get_method_invocation_params(invoked_method_child) - if considered_classes.get(name_of_invoked_class.object_name) is None: - considered_classes[name_of_invoked_class.object_name] = 0 - fan_outs += 1 + return type_reference.name - return fan_outs + # exception are used from https://checkstyle.sourceforge.io/config_metrics.html#ClassFanOutComplexity + # basic types ('int', 'long', etc.) are not used, because ASTNodeType.REFERENCE_TYPE match only class types + _excluded_class_names = { + "ArrayIndexOutOfBoundsException", + "ArrayList", + "Boolean", + "Byte", + "Character", + "Class", + "Deprecated", + "Deque", + "Double", + "Exception", + "Float", + "FunctionalInterface", + "HashMap", + "HashSet", + "IllegalArgumentException", + "IllegalStateException", + "IndexOutOfBoundsException", + "Integer", + "LinkedList", + "List", + "Long", + "Map", + "NullPointerException", + "Object", + "Override", + "Queue", + "RuntimeException", + "SafeVarargs", + "SecurityException", + "Set", + "Short", + "SortedMap", + "SortedSet", + "String", + "StringBuffer", + "StringBuilder", + "SuppressWarnings", + "Throwable", + "TreeMap", + "TreeSet", + "UnsupportedOperationException", + "Void", + "System.out", + } diff --git a/aibolit/metrics/ncss/ncss.py b/aibolit/metrics/ncss/ncss.py index ea8a335e..f0d43115 100644 --- a/aibolit/metrics/ncss/ncss.py +++ b/aibolit/metrics/ncss/ncss.py @@ -24,42 +24,44 @@ from aibolit.utils.ast_builder import build_ast -class NCSSMetric(): - ''' +class NCSSMetric: + """ NCSS metric counts non-commenting source statements. - It counts keywords, declarations and statement expressions. - Following description was used as a reference: - https://pmd.github.io/latest/pmd_java_metrics_index.html#non-commenting-source-statements-ncss - ''' + It counts: + - keywords from _keyword_node_types + - declarations from _declarations_node_types + - local variable declarations and statement expressions + """ def value(self, filename: str) -> int: metric = 0 ast = AST.build_from_javalang(build_ast(filename)) - for node in ast.get_proxy_nodes(*NCSSMetric._keyword_node_types, - *NCSSMetric._declarations_node_types, - *NCSSMetric._misc_node_types): + for node in ast.get_proxy_nodes( + *NCSSMetric._keyword_node_types, + *NCSSMetric._declarations_node_types, + *NCSSMetric._misc_node_types + ): metric += 1 + if node.node_type == ASTNodeType.IF_STATEMENT and self._has_pure_else_statements(node): metric += 1 - - for try_statement in ast.get_proxy_nodes(ASTNodeType.TRY_STATEMENT): - if self._has_finally_block(try_statement): + elif node.node_type == ASTNodeType.TRY_STATEMENT and self._has_finally_block(node): metric += 1 return metric def _has_pure_else_statements(self, if_statement: ASTNode) -> bool: - ''' + """ Checks is there else branch. If else branch appeared to be "else if" construction (not pure "else"), returns False. - ''' - assert(if_statement.node_type == ASTNodeType.IF_STATEMENT) + """ + assert if_statement.node_type == ASTNodeType.IF_STATEMENT return if_statement.else_statement is not None and \ if_statement.else_statement.node_type != ASTNodeType.IF_STATEMENT def _has_finally_block(self, try_statement: ASTNode) -> bool: - assert(try_statement.node_type == ASTNodeType.TRY_STATEMENT) + assert try_statement.node_type == ASTNodeType.TRY_STATEMENT return try_statement.finally_block is not None # Two keywords "else" and "finally" are not represented by any nodes @@ -78,6 +80,7 @@ def _has_finally_block(self, try_statement: ASTNode) -> bool: ASTNodeType.SWITCH_STATEMENT, ASTNodeType.SYNCHRONIZED_STATEMENT, ASTNodeType.THROW_STATEMENT, + ASTNodeType.TRY_STATEMENT, ASTNodeType.WHILE_STATEMENT, } diff --git a/aibolit/ml_pipeline/ml_pipeline.py b/aibolit/ml_pipeline/ml_pipeline.py index ffa32d69..2427ff81 100644 --- a/aibolit/ml_pipeline/ml_pipeline.py +++ b/aibolit/ml_pipeline/ml_pipeline.py @@ -2,11 +2,10 @@ import shutil import subprocess from pathlib import Path -from sklearn.model_selection import train_test_split # type: ignore import pickle -from random import randint -from aibolit.model.model import Dataset, TwoFoldRankingModel # type: ignore +from aibolit.model.model import PatternRankingModel, scale_dataset # type: ignore from aibolit.config import Config +import pandas as pd # type: ignore def collect_dataset(args): @@ -53,29 +52,23 @@ def run_cmd(metrics_cmd, cur_work_dir): print('Filtering java files...') filter_cmd = ['make', 'filter'] - metrics_cmd = ['make', 'metrics'] merge_cmd = ['make', 'merge'] - build_halstead_cmd = ['make', 'build_halstead'] - make_hl_cmd = ['make', 'hl'] + split_cmd = ['make', 'split'] if java_folder is not None: filter_cmd.append(f'dir={java_folder}') - metrics_cmd.append(f'dir={java_folder}') if max_classes is not None: filter_cmd.append(f'max_classes={max_classes}') run_cmd(filter_cmd, cur_work_dir) - print('Download PMD and compute metrics...') - run_cmd(metrics_cmd, cur_work_dir) make_patterns(args, cur_work_dir) - print('Building halstead.jar...') - run_cmd(build_halstead_cmd, cur_work_dir) - print('Calculating halstead metrics...') - run_cmd(make_hl_cmd, cur_work_dir) - print('Merge results and create dataset...') + print('Merge results...') run_cmd(merge_cmd, cur_work_dir) + print('Preprocess dataset, create train and test...') + run_cmd(split_cmd, cur_work_dir) + def train_process(): """ @@ -92,19 +85,23 @@ def train_process(): + ['halstead volume'] columns_features = only_metrics + only_patterns features_number = len(columns_features) - print("Number of features: ", features_number) + print("General number of features in config: ", features_number) - dataset = Dataset(only_patterns) - dataset.preprocess_file() + train_dataset = pd.read_csv(Config.train_csv(), index_col=None) + model = PatternRankingModel() + # At the moment we use use patterns as features, + # but in future features can be also some metrics. + # We should differ them for any purpose (scaling, etc.) features_conf = { - "features_order": dataset.feature_order, + "features_order": only_patterns, "patterns_only": only_patterns } - - X_train, X_test, y_train, y_test = train_test_split(dataset.input, dataset.target, test_size=0.3) - model = TwoFoldRankingModel() - model.fit(X_train, y_train) model.features_conf = features_conf + print('Scaling features...') + scaled_dataset = scale_dataset(train_dataset, model.features_conf) + dataset = scaled_dataset[only_patterns] + print('Training model...') + model.fit_regressor(dataset, scaled_dataset['M4']) save_model_file = Path(Config.folder_to_save_model_data(), 'model.pkl') print('Saving model to loaded model from file {}:'.format(save_model_file)) @@ -113,13 +110,18 @@ def train_process(): load_model_file = Path(Config.folder_to_save_model_data(), 'model.pkl') print('Test loaded model from file {}:'.format(load_model_file)) + test_dataset = pd.read_csv(Config.test_csv(), index_col=None) with open(load_model_file, 'rb') as fid: model_new = pickle.load(fid) + scaled_test_dataset = scale_dataset(test_dataset, model_new.features_conf).sample(n=10, random_state=17) print('Model has been loaded successfully') - for x in X_test: - snippet = [i for i in x] + [randint(1, 200)] - preds, importances = model_new.informative(snippet) + # add ncss, ncss is needed in informative as a last column + X_test = scaled_test_dataset[only_patterns + ['M2']] + + for _, row in X_test.iterrows(): + preds, importances = model_new.rank(row.values) print(preds) path_with_logs = Path(os.getcwd(), 'catboost_info') print('Removing path with catboost logs {}'.format(path_with_logs)) - shutil.rmtree(path_with_logs) + if path_with_logs.exists(): + shutil.rmtree(path_with_logs) diff --git a/aibolit/model/model.py b/aibolit/model/model.py index d6d335aa..17e581d4 100644 --- a/aibolit/model/model.py +++ b/aibolit/model/model.py @@ -1,80 +1,86 @@ from decimal import localcontext, ROUND_DOWN, Decimal -from typing import List +from typing import Dict, Any, Tuple import numpy as np import pandas as pd from catboost import CatBoost from sklearn.base import BaseEstimator -from sklearn.preprocessing import StandardScaler from aibolit.config import Config -class Dataset: - - def __init__(self, only_patterns: List[str]): - self.input = None - self.target = None - self.do_rename_columns = False - self.only_patterns = only_patterns - - def preprocess_file( - self, - scale_ncss=True, - scale=False, - **kwargs): - - print('reading dataset from {}'.format(Config.dataset_file())) - df = pd.read_csv(Config.dataset_file()) - df = df[~df["filename"].str.lower().str.contains("test")] - config = Config.get_patterns_config() - if self.do_rename_columns: - p_codes = \ - [x['code'] for x in config['patterns']] \ - + ['lines' + x['code'] for x in config['patterns']] - m_codes = [x['code'] for x in config['metrics']] - keys = p_codes + m_codes - vals = \ - [x['name'] for x in config['patterns']] \ - + ['lines' + x['name'] for x in config['patterns']] \ - + [x['name'] for x in config['metrics']] - - replace_dict = dict(zip(keys, vals)) - df = df.rename(replace_dict) - df.columns = vals - print('Columns renamed:' + df.head()) - - df = df.dropna().drop_duplicates(subset=df.columns.difference(['filename'])) - df = df[(df.ncss > 20) & (df.ncss < 100) & (df.npath_method_avg < 100000.00)].copy().reset_index() - - df.drop('filename', axis=1, inplace=True) - df.drop('index', axis=1, inplace=True) - self.target = np.array(df[['M4']].values[:, 0], dtype=np.float64) - if scale_ncss: - new = pd.DataFrame( - df[self.only_patterns].values / df['M2'].values.reshape((-1, 1)), - columns=self.only_patterns - ) - self.target /= df['M2'].values.reshape(-1) - else: - new = df[self.only_patterns].copy() - if scale: - self.input = pd.DataFrame(StandardScaler().fit_transform(new.values), columns=new.columns, - index=new.index).values - else: - self.input = new.values +def get_minimum( + c1: np.array, + c2: np.array, + c3: np.array) -> Tuple[np.array, np.array]: + """ + Args: + c1, c2, c3: np.array with shape (number of snippets, ). + Returns: + c: np.array with shape (number of snippets, ) - + elemental minimum of 3 arrays. + number: np.array with shape (number of snippets, ) of + arrays' numbers with minimum elements. . + """ + + c = np.vstack((c1, c2, c3)) + + return np.min(c, 0), np.argmin(c, 0) + + +def generate_fake_dataset() -> pd.DataFrame: + config = Config.get_patterns_config() + patterns = [x['code'] for x in config['patterns']] + metrics = [x['code'] for x in config['metrics']] + + train_df = pd.DataFrame(columns=patterns) + min_rows_for_train = 10 + for x in range(min_rows_for_train): + p = {p: (x + i) for i, p in enumerate(patterns)} + m = {p: (x + i) for i, p in enumerate(metrics)} + row = {**p, **m} + train_df = train_df.append(row, ignore_index=True) + + train_df = train_df.astype(float) + return train_df + + +def scale_dataset( + df: pd.DataFrame, + features_conf: Dict[Any, Any], + scale_ncss=True) -> pd.DataFrame: + config = Config.get_patterns_config() + patterns_codes_set = set([x['code'] for x in config['patterns']]) + metrics_codes_set = [x['code'] for x in config['metrics']] + exclude_features = set(config['patterns_exclude']).union(set(config['metrics_exclude'])) + used_codes = set(features_conf['features_order']) + used_codes.add('M4') + not_scaled_codes = set(patterns_codes_set).union(set(metrics_codes_set)).difference(used_codes).difference( + exclude_features) + features_not_in_config = set(df.columns).difference(not_scaled_codes).difference(used_codes) + not_scaled_codes = sorted(not_scaled_codes.union(features_not_in_config)) + codes_to_scale = sorted(used_codes) + if scale_ncss: + scaled_df = pd.DataFrame( + df[codes_to_scale].values / df['M2'].values.reshape((-1, 1)), + columns=codes_to_scale + ) + not_scaled_df = df[not_scaled_codes] + input = pd.concat([scaled_df, not_scaled_df], axis=1) + else: + input = df - self.feature_order = list(new.columns) + return input -class TwoFoldRankingModel(BaseEstimator): +class PatternRankingModel(BaseEstimator): def __init__(self): self.do_rename_columns = False self.model = None self.features_conf = None - def fit(self, X, y, display=False): + def fit_regressor(self, X, y, display=False): """ Args: X: np.array with shape (number of snippets, number of patterns) or @@ -153,22 +159,7 @@ def calculate_score(self, X, quantity_func='log', th=1.0, feature_importances=No return (np.array(ranked), pairs[:, 0].T.tolist()[::-1]) - def get_minimum(self, c1, c2, c3): - """ - Args: - c1, c2, c3: np.array with shape (number of snippets, ). - Returns: - c: np.array with shape (number of snippets, ) - - elemental minimum of 3 arrays. - number: np.array with shape (number of snippets, ) of - arrays' numbers with minimum elements. . - """ - - c = np.vstack((c1, c2, c3)) - - return np.min(c, 0), np.argmin(c, 0) - - def informative(self, snippet, scale=True, th=1.0): + def rank(self, snippet, scale=True, th=1.0): """ Args: snippet: np.array with shape (number of snippets, number of patterns + 1), diff --git a/aibolit/model/stats.py b/aibolit/model/stats.py new file mode 100644 index 00000000..877cdbb4 --- /dev/null +++ b/aibolit/model/stats.py @@ -0,0 +1,204 @@ +import pickle +from typing import Dict, Any, Tuple + +import numpy as np +import pandas as pd +from aibolit.config import Config +from aibolit.model.model import PatternRankingModel, scale_dataset, get_minimum # noqa: F401 type: ignore + + +class Stats(object): + + @staticmethod + def aibolit_stat(test_csv: pd.DataFrame, model=None) -> pd.DataFrame: + if not model: + load_model_file = Config.folder_model_data() + print('Loading model from file {}:'.format(load_model_file)) + with open(load_model_file, 'rb') as fid: + model = pickle.load(fid) + print('Model has been loaded successfully') + + scaled_dataset = scale_dataset(test_csv, model.features_conf) + cleaned_dataset = scaled_dataset[model.features_conf['features_order'] + ['M2']] + ranked, _, acts_complexity, acts = Stats.check_impact( + cleaned_dataset.values, + model + ) + + m, p = Stats.count_acts(acts, ranked) + return Stats.get_table(model.features_conf['features_order'], m, p, acts_complexity) + + @staticmethod + def count_acts( + acts: np.array, + ranked: np.array) -> Tuple[np.array, np.array]: + patterns_numbers = ranked[:, 0] + # number of times when pattern was on first place, + # if we decrease pattern by 1/ncss + m = np.zeros(ranked.shape[1]) + # number of times when pattern was on first place, + # if we increase pattern by 1/ncss + p = np.zeros(ranked.shape[1]) + for i in range(len(patterns_numbers)): + if acts[i] == 1: + m[patterns_numbers[i]] += 1 + elif acts[i] == 2: + p[patterns_numbers[i]] += 1 + return m, p + + @staticmethod + def get_patterns_name() -> Dict[Any, Any]: + only_patterns = [] + patterns_code = [] + config = Config.get_patterns_config() + for x in config['patterns']: + if x['code'] not in config['patterns_exclude']: + only_patterns.append(x['name']) + patterns_code.append(x['code']) + features_number = len(only_patterns) + print("Number of features: ", features_number) + patterns = {x['code']: x['name'] for x in config['patterns']} + metrics = {x['code']: x['name'] for x in config['metrics']} + replace_dict = dict(patterns, **metrics) + return replace_dict + + @staticmethod + def get_table( + features_conf: Dict[Any, Any], + m: np.array, + p: np.array, + acts_complexity) -> pd.DataFrame: + """ + Prints results, given with `check_impact`. + + + :param features_conf: features config of model + :param m: number of times when pattern was on first place, + if we decrease pattern by 1/ncss + :param p: number of times when pattern was on first place, + if we increase pattern by 1/ncss + :param acts_complexity: + + """ + + df = pd.DataFrame(columns=[ + 'pattern', ' -1(top1)', '+1(top1)', + 'p-c-', 'p+c+', 'p-c+', 'p+c-', 'p-c=', + 'p+c=']) + replace_dict = Stats.get_patterns_name() + for i in range(len(features_conf)): + top_minus = int(m[i]) + top_plus = int(p[i]) + p_minus_c_minus = int(acts_complexity[i, 0]) + p_plus_c_plus = int(acts_complexity[i, 4]) + p_minus_c_plus = int(acts_complexity[i, 1]) + p_plus_c_minus = int(acts_complexity[i, 3]) + p_minus_c_euq = int(acts_complexity[i, 2]) + p_plus_c_euq = int(acts_complexity[i, 5]) + pattern = replace_dict.get(features_conf[i]) + df = df.append({ + 'pattern': pattern, ' -1(top1)': top_minus, '+1(top1)': top_plus, + 'p-c-': p_minus_c_minus, 'p+c+': p_plus_c_plus, 'p-c+': p_minus_c_plus, + 'p+c-': p_plus_c_minus, 'p-c=': p_minus_c_euq, + 'p+c=': p_plus_c_euq + }, ignore_index=True) + + return df + + @staticmethod + def split_dataset_by_pattern_value( + X: np.array, + pattern_idx: int) -> Tuple[np.array, np.array]: + """ Divide dataset. + + :param X: dataset + :param pattern_idx: pattern index + :return: + 1st is dataset with pattern where pattern can be null, + 2nd is dataset with pattern where pattern is not null, + """ + nulls = [] + not_nulls = [] + for snipp in X: + if snipp[pattern_idx] == 0: + nulls.append(snipp) + else: + not_nulls.append(snipp) + + return np.array(nulls), np.array(not_nulls) + + @staticmethod + def change_matrix_by_value( + arr: np.array, + mask: np.array, + i: int, + incr: np.array): + """ + Args: + X: np.array with shape (number of snippets, number of patterns). + mask: np.array with shape (number of snippets, number of patterns). + i: int, 0 <= i < number of patterns. + incr: matrix values to add. + mask: matrix of bools + Returns: + X1: modified np.array with shape (number of snippets, number of patterns). + """ + + X1 = arr.copy() + X1[:, i] += incr[mask[:, i]] + + return X1 + + @staticmethod + def check_impact( + X: np.array, + model_input: Any): + """ + Args: + X: np.array with shape (number of snippets, number of patterns) or + (number of patterns, ). + model_input: model to use + Returns: + ranked: np.array with shape (number of snippets, number of patterns) + of sorted patterns in non-increasing order for each snippet of + code. + acts: np.array with shape (number of snippets, ) of + numbers of necessary actions for complexity's decrement. + 0 - do not modify the pattern, 1 - decrease by 1, 2 - increase by 1. + importances: importances of patters + acts_complexity: matrix with number of time for each pattern when: + pattern was increased, complexity increased, + pattern was increased, complexity decreased, etc. + """ + + if X.ndim == 1: + X = X.copy() + X = np.expand_dims(X, axis=0) + ncss = X[:, -1] + X = X[:, :-1] + + k = X.shape[1] + complexity = model_input.model.predict(X) + importances = np.zeros(X.shape) + actions = np.zeros(X.shape) + acts_complexity = np.zeros((X.shape[1], 6)) + for i in range(k): + nulls, not_nulls = Stats.split_dataset_by_pattern_value(X, i) + mask = not_nulls > 0 + dec_arr = Stats.change_matrix_by_value(not_nulls, mask, i, -1.0 / ncss[X[:, i] > 0]) + complexity_minus = model_input.model.predict(dec_arr) + incr_arr = Stats.change_matrix_by_value(not_nulls, mask, i, 1.0 / ncss[X[:, i] > 0]) + complexity_plus = model_input.model.predict(incr_arr) + c, number = get_minimum(complexity[X[:, i] > 0], complexity_minus, complexity_plus) + importances[:, i][X[:, i] > 0] = complexity[X[:, i] > 0] - c + actions[:, i][X[:, i] > 0] = number + acts_complexity[i, 0] += (complexity_minus < complexity[X[:, i] > 0]).sum() + acts_complexity[i, 1] += (complexity_minus > complexity[X[:, i] > 0]).sum() + acts_complexity[i, 2] += (complexity_minus == complexity[X[:, i] > 0]).sum() + acts_complexity[i, 3] += (complexity_plus < complexity[X[:, i] > 0]).sum() + acts_complexity[i, 4] += (complexity_plus > complexity[X[:, i] > 0]).sum() + acts_complexity[i, 5] += (complexity_plus == complexity[X[:, i] > 0]).sum() + + ranked = np.argsort(-1 * importances, 1) + acts = actions[np.argsort(ranked, 1) == 0] + return ranked, importances, acts_complexity, acts diff --git a/test/model/__init__.py b/aibolit/patterns/hybrid_constructor/__init__.py similarity index 100% rename from test/model/__init__.py rename to aibolit/patterns/hybrid_constructor/__init__.py diff --git a/aibolit/patterns/hybrid_constructor/hybrid_constructor.py b/aibolit/patterns/hybrid_constructor/hybrid_constructor.py new file mode 100644 index 00000000..901ef228 --- /dev/null +++ b/aibolit/patterns/hybrid_constructor/hybrid_constructor.py @@ -0,0 +1,115 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from typing import Any, List + +from aibolit.ast_framework import AST, ASTNodeType, ASTNode +from aibolit.utils.ast_builder import build_ast + + +class HybridConstructor: + + def is_statement_ctor_inv(self, node: ASTNode) -> bool: + """Is statement explicit constructor invocation.""" + + if node.expression.node_type == ASTNodeType.EXPLICIT_CONSTRUCTOR_INVOCATION: + return True + else: + return False + + def traverse_in_if( + self, + val: ASTNode, + exp_ctrs_decls: List[ASTNode], + other_statements: List[ASTNode]) -> None: + """Traverse over if condition recursively to find + explicit constructor invocation.""" + if hasattr(val, 'statements'): + children = list(val.statements) + for i in children: + self.traverse(i, exp_ctrs_decls, other_statements) + + if hasattr(val, 'then_statement'): + self.traverse_in_if(val.then_statement, exp_ctrs_decls, other_statements) + other_statements.append(val.then_statement) + if hasattr(val, 'else_statement'): + self.traverse_in_if(val.else_statement, exp_ctrs_decls, other_statements) + other_statements.append(val.else_statement) + + def traverse( + self, + statement: ASTNode, + exp_ctrs_decls: List[ASTNode], + other_statements: List[ASTNode]) -> None: + """Traverse over AST recursively to find all explicit + constructor invocations and other statements.""" + + if statement.node_type == ASTNodeType.STATEMENT_EXPRESSION: + is_ctor_inv = self.is_statement_ctor_inv(statement) + if is_ctor_inv: + exp_ctrs_decls.append(statement) + else: + other_statements.append(statement) + elif statement.node_type == ASTNodeType.TRY_STATEMENT: + self.traverse_in_try(exp_ctrs_decls, other_statements, statement) + elif statement.node_type in ( + ASTNodeType.DO_STATEMENT, + ASTNodeType.WHILE_STATEMENT): + for i in statement.body.children: + self.traverse(i, exp_ctrs_decls, other_statements) + elif statement.node_type == ASTNodeType.FOR_STATEMENT: + for i in statement.body.children: + other_statements.append(statement) + self.traverse(i, exp_ctrs_decls, other_statements) + elif statement.node_type == ASTNodeType.IF_STATEMENT: + other_statements.append(statement) + self.traverse_in_if(statement.then_statement, exp_ctrs_decls, other_statements) + self.traverse_in_if(statement.else_statement, exp_ctrs_decls, other_statements) + else: + other_statements.append(statement) + + def traverse_in_try( + self, + exp_ctrs_decls: List[ASTNode], + other_statements: List[ASTNode], + statement: ASTNode) -> None: + """Check try statements and find different statements.""" + if (statement.resources is not None) or \ + (statement.catches is not None and statement.catches[0].block != []) or \ + (statement.finally_block is not None): + other_statements.append(statement) + for try_stat in statement.block: + self.traverse(try_stat, exp_ctrs_decls, other_statements) + + def value(self, filename: str) -> List[int]: + tree = AST.build_from_javalang(build_ast(filename)) + lines = [] + for node in tree.get_proxy_nodes(ASTNodeType.CONSTRUCTOR_DECLARATION): + exp_ctrs_decls: List[Any] = [] + other_statements: List[Any] = [] + for statement in node.body: + self.traverse(statement, exp_ctrs_decls, other_statements) + + if len(exp_ctrs_decls) > 0: + if len(other_statements) > 0: + lines.append(node.line) + + return lines diff --git a/aibolit/patterns/many_primary_ctors/many_primary_ctors.py b/aibolit/patterns/many_primary_ctors/many_primary_ctors.py index ab313e13..380bfc73 100644 --- a/aibolit/patterns/many_primary_ctors/many_primary_ctors.py +++ b/aibolit/patterns/many_primary_ctors/many_primary_ctors.py @@ -19,42 +19,37 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -from typing import List - -import javalang.ast -import javalang.parse -import javalang.tree - -from aibolit.types_decl import LineNumber +from aibolit.ast_framework import ASTNodeType, AST from aibolit.utils.ast_builder import build_ast +from aibolit.ast_framework.ast_node import ASTNode +from typing import List, Union class ManyPrimaryCtors(object): - def value(self, filename: str): - tree = build_ast(filename) - - return self.__traverse_node(tree) - - def __traverse_node(self, tree: javalang.ast.Node) -> List[LineNumber]: - lines: List[LineNumber] = list() - - for _, class_declaration in tree.filter(javalang.tree.ClassDeclaration): - primary_ctors = list(filter(_is_primary, class_declaration.constructors)) - - if len(primary_ctors) > 1: - lines.extend(ctor.position.line for ctor in primary_ctors) - + ''' + If there is more than one primary + constructors in a class, it is + considered a pattern + ''' + def value(self, filename: str) -> List[int]: + lines: List[int] = list() + ast = AST.build_from_javalang(build_ast(filename)) + for class_declaration in ast.get_proxy_nodes(ASTNodeType.CLASS_DECLARATION): + primary_lines = self.__find_primary(ast, class_declaration.body) + if len(primary_lines) > 1: + lines.extend(primary_lines) return lines + def __find_primary(self, ast: AST, class_body: List[ASTNode]) -> List[int]: + lines: List[int] = [] + for node in class_body: + if self.__check_primary(ast, node): + lines.append(node.line) + return lines -def _is_primary(constructor: javalang.tree.ConstructorDeclaration) -> bool: - for _, assignment in constructor.filter(javalang.tree.Assignment): - if _is_instance_variable_assignment(assignment): - return True - - return False - - -def _is_instance_variable_assignment(assignment: javalang.tree.Assignment) -> bool: - return isinstance(assignment.expressionl, javalang.tree.This) + def __check_primary(self, ast: AST, node: Union[ASTNode, List[ASTNode]]) -> bool: + if isinstance(node, ASTNode) and node.node_type == ASTNodeType.CONSTRUCTOR_DECLARATION: + for assignment in ast.get_subtree(node).get_proxy_nodes(ASTNodeType.ASSIGNMENT): + if assignment.expressionl.node_type == ASTNodeType.THIS: + return True + return False diff --git a/aibolit/patterns/multiple_try/multiple_try.py b/aibolit/patterns/multiple_try/multiple_try.py index 0663b745..0c2317b2 100644 --- a/aibolit/patterns/multiple_try/multiple_try.py +++ b/aibolit/patterns/multiple_try/multiple_try.py @@ -1,77 +1,38 @@ -import javalang -from typing import List -import uuid -from collections import defaultdict -import hashlib -import itertools -from javalang.tree import FormalParameter - +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from aibolit.ast_framework import ASTNodeType, AST from aibolit.utils.ast_builder import build_ast +from typing import List class MultipleTry: - - def __init__(self): - pass - - def traverse_node(self, node, dict_with_chains, uuid_method): - if not node: - return dict_with_chains - - for item in node.children: - if item and (isinstance(item, tuple) or isinstance(item, list)): - for j in item: - if isinstance(j, javalang.tree.MethodInvocation): - if not j.qualifier and j.qualifier != '': - # it means that there is method chaining - dict_with_chains[uuid_method].append([j.position.line, j.member]) - self.traverse_node(j, dict_with_chains, uuid_method) - else: - # it means that we have method invocation without chaining like - # result.add(field.getName(), column.columnName(), field.getType()); - new_uuid = uuid.uuid1() - dict_with_chains[new_uuid].append([j.position.line, j.member]) - self.traverse_node(j, dict_with_chains, new_uuid) - - elif isinstance(j, javalang.tree.MethodDeclaration): - self.traverse_node(j, dict_with_chains, str(uuid.uuid1())) - - elif isinstance(j, javalang.tree.StatementExpression): - self.traverse_node(j, dict_with_chains, uuid_method) - - elif isinstance(j, javalang.tree.This) or isinstance(j, javalang.tree.ClassCreator): - self.traverse_node(j, dict_with_chains, str(uuid.uuid1())) - elif isinstance(item, javalang.tree.ClassCreator): - self.traverse_node(item, dict_with_chains, uuid_method) - - return dict_with_chains - - - # flake8: noqa: C901 - def value(self, filename: str): - """ - Travers over AST tree and fins function with nested/sequential try statement - :param filename: - :return: - List of tuples with LineNumber and List of methods names, e.g. - [[10, 'func1'], [10, 'fun2']], [[23, 'run'], [23, 'start']]] - """ - - tree = build_ast(filename) - res = defaultdict(list) - for _, method_node in tree.filter(javalang.tree.MethodDeclaration): - for _, try_node in method_node.filter(javalang.tree.TryStatement): - formal_params = [ - (x.type.name + ' ' + x.name) - for x in method_node.parameters - if isinstance(x, FormalParameter) - ] - func_name = '{f}({params})'.format( - f=method_node.name, - params=','.join(formal_params) - ).encode('utf-8') - m = hashlib.md5() - m.update(func_name) - res[m.hexdigest()].append(method_node.position.line) - - return list(set(itertools.chain.from_iterable([y for x, y in res.items() if len(y) > 1]))) \ No newline at end of file + ''' + Check if a method contains more than one Try Statement + ''' + def value(self, filename) -> List[int]: + total_code_lines: List[int] = [] + ast = AST.build_from_javalang(build_ast(filename)) + for method_declaration in ast.get_proxy_nodes(ASTNodeType.METHOD_DECLARATION): + try_nodes = list(ast.get_subtree(method_declaration).get_proxy_nodes(ASTNodeType.TRY_STATEMENT)) + if len(try_nodes) > 1: + total_code_lines.append(method_declaration.line) + return total_code_lines diff --git a/aibolit/patterns/redundant_catch/redundant_catch.py b/aibolit/patterns/redundant_catch/redundant_catch.py index e37d5284..b57f366f 100644 --- a/aibolit/patterns/redundant_catch/redundant_catch.py +++ b/aibolit/patterns/redundant_catch/redundant_catch.py @@ -20,86 +20,45 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import itertools -from collections import defaultdict -from collections import namedtuple - -import javalang - -from aibolit.utils.java_parser import JavalangImproved - -ExceptionInfo = namedtuple('ExceptionInfo', 'func_name, catch_list, throws_list, line_number') +from aibolit.ast_framework import ASTNodeType, AST +from aibolit.utils.ast_builder import build_ast +from typing import List +from aibolit.ast_framework.ast_node import ASTNode class RedundantCatch: - """ - Find pattern when a method in Java class throws an exception, - and the exception of the same type is caught inside the method. - - E.g., - class Book { - void foo() throws IOException { - try { - Files.readAllBytes(); - } catch (IOException e) - { // here - // do something - } - } - } - Here, the method foo() throws IOException, but we catch it inside the method - """ - def __init__(self): - pass - - def value(self, filename): - """ - Find the mentioned-above pattern - :param filename: filename of Java file - :return: code lines of try statement where it was found - """ - total_code_lines = set() - obj = JavalangImproved(filename) - items = obj.tree_to_nodes() - try_nodes = defaultdict(list) - method_nodes = {} - for x in items: - # Line break occurred before a binary operator (W503) - # But this rule goes against the PEP 8 recommended style, so - # replace isinstanceof with variable - is_instance_meth_decl = isinstance(x.node, javalang.tree.MethodDeclaration) - is_instance_try_stat = isinstance(x.node, javalang.tree.TryStatement) - is_instance_ctor_decl = isinstance(x.node, javalang.tree.ConstructorDeclaration) - is_instance_lambda = isinstance(x.node, javalang.tree.LambdaExpression) - if is_instance_try_stat and x.method_line and not is_instance_lambda: - # If we do not have a line for method, we ignore this method - try_nodes[x.method_line].append(x) - elif (is_instance_meth_decl or is_instance_ctor_decl) and x.method_line and not is_instance_lambda: - # If we do not have a line for method, we ignore this method - method_nodes[x.method_line] = x + ''' + To check wether the method throws same as it does inside the + try -> catch structure in this method + ''' - for method_line, iter_nodes in sorted(try_nodes.items(), key=lambda x: x[1][0].line): - for try_node in iter_nodes: - method_node = method_nodes.get(method_line) + def _is_redundant(self, method_throw_name: List[str], try_node: ASTNode) -> bool: + assert try_node.node_type == ASTNodeType.TRY_STATEMENT + for catch_node in try_node.catches: + for catch_node_name in catch_node.parameter.types: + if catch_node_name in method_throw_name: + return True + return False - if not method_node or not method_node.node.throws: - continue + def _get_lambda_try_nodes(self, ast: AST, lambda_node: ASTNode) -> List[int]: + assert lambda_node.node_type == ASTNodeType.LAMBDA_EXPRESSION + return [try_node.line for try_node in + ast.get_subtree(lambda_node).get_proxy_nodes(ASTNodeType.TRY_STATEMENT)] - catch_list = [] - ei = ExceptionInfo( - func_name=method_node.node.name, - catch_list=catch_list, - throws_list=method_node.node.throws, - line_number=method_node.node.position.line - ) - if try_node.node.catches: - catch_classes = [x.parameter.types for x in try_node.node.catches] - classes_exception_list = list(itertools.chain(*catch_classes)) - ei.catch_list.extend(classes_exception_list) + def value(self, filename: str) -> List[int]: + lines: List[int] = [] + excluded_nodes: List[int] = [] + ast = AST.build_from_javalang(build_ast(filename)) - lines_number = set([ - try_node.line for c in ei.catch_list if c in ei.throws_list - ]) - total_code_lines.update(lines_number) + for method_declaration in ast.get_proxy_nodes(ASTNodeType.METHOD_DECLARATION, + ASTNodeType.CONSTRUCTOR_DECLARATION): + method_throw_names = method_declaration.throws + for try_node in ast.get_subtree(method_declaration).get_proxy_nodes(ASTNodeType.TRY_STATEMENT): + if method_throw_names is not None and \ + try_node.catches is not None and \ + self._is_redundant(method_throw_names, try_node): + lines.append(try_node.line) - return sorted(total_code_lines) + for lambda_node in ast.get_subtree(method_declaration).get_proxy_nodes(ASTNodeType.LAMBDA_EXPRESSION): + excluded_nodes.extend(self._get_lambda_try_nodes(ast, lambda_node)) + return sorted(list(set(lines).difference(set(excluded_nodes)))) diff --git a/aibolit/patterns/send_null/send_null.py b/aibolit/patterns/send_null/send_null.py index 109fed5a..f24b74a2 100644 --- a/aibolit/patterns/send_null/send_null.py +++ b/aibolit/patterns/send_null/send_null.py @@ -1,6 +1,6 @@ -import javalang -from typing import List +from typing import List, Any +from aibolit.ast_framework import ASTNodeType, AST from aibolit.utils.ast_builder import build_ast @@ -9,59 +9,32 @@ class SendNull: def __init__(self): pass - def __find_position_recursively(self, node): - if not hasattr(node, 'children'): - return - else: - for i in node.children: - if not isinstance(i, list): - if hasattr(i, '_position'): - if i.position: - return i._position.line - else: - for j in i.children: - position = self.__find_position_recursively(j) - if position: - return position - else: - for j in i: - position = self.__find_position_recursively(j) + def __is_null(self, val: Any) -> bool: + if not hasattr(val, 'value'): + return False + if not isinstance(val.value, str): + return False + if val.value != 'null': + return False + return True - return position - - # flake8: noqa - # after fix addition, errors are shown def value(self, filename: str) -> List[int]: - lst: List[int] = [] - tree = build_ast(filename) - - invocation_tree = tree.filter(javalang.tree.Invocation) - arg_list = [x for _, x in invocation_tree] - - for argument in arg_list: - ternary_list = argument.filter(javalang.tree.TernaryExpression) - for _, expr in ternary_list: - if isinstance(expr.if_false, javalang.tree.Literal) and expr.if_false.value == 'null': - if hasattr(argument, '_position'): - lst.append(argument._position.line) - else: - position = self.__find_position_recursively(expr) - lst.append(position) - if isinstance(expr.if_true, javalang.tree.Literal) and expr.if_true.value == 'null': - if hasattr(argument, '_position'): - lst.append(argument._position.line) - else: - position = self.__find_position_recursively(expr) - lst.append(position) - - for _, node in tree: - try: - for argument in node.arguments: - if isinstance(argument, javalang.tree.Literal) and argument.value == "null" and \ - argument._position.line not in lst: - lst.append(argument._position.line) - except (AttributeError, TypeError): - pass - lst = sorted(lst) + lines = set() + tree = AST.build_from_javalang(build_ast(filename)) + invocatios_types = [ + ASTNodeType.METHOD_INVOCATION, + ASTNodeType.EXPLICIT_CONSTRUCTOR_INVOCATION, + ASTNodeType.CLASS_CREATOR + ] + for node in tree.get_proxy_nodes(*invocatios_types): + for argument in node.arguments: + if (argument.node_type == ASTNodeType.LITERAL) and (argument.value == "null"): + lines.add(argument.line) + + for node in tree.get_proxy_nodes(ASTNodeType.TERNARY_EXPRESSION): + if self.__is_null(node.if_false) or self.__is_null(node.if_true): + lines.add(node.line) + + lst = sorted(lines) return lst diff --git a/aibolit/patterns/string_concat/string_concat.py b/aibolit/patterns/string_concat/string_concat.py index 59ff033e..1129fd8e 100644 --- a/aibolit/patterns/string_concat/string_concat.py +++ b/aibolit/patterns/string_concat/string_concat.py @@ -1,54 +1,47 @@ -from typing import Tuple, Dict, List -from typing import Tuple, Dict, List -import javalang - -from aibolit.types_decl import LineNumber +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from aibolit.ast_framework import ASTNodeType, AST from aibolit.utils.ast_builder import build_ast -from aibolit.utils.utils import RemoveComments +from typing import Set, List +from aibolit.ast_framework.ast_node import ASTNode class StringConcatFinder: - - def __init__(self): - pass - - # flake8: noqa: C901 - def value(self, filename: str) -> List[LineNumber]: - - lines = set() - text = build_ast(filename) - - for _, node in text.filter(javalang.tree.BinaryOperation): - if node.operator == '+': - is_l_literal = isinstance(node.operandl, javalang.tree.Literal) - is_r_literal = isinstance(node.operandr, javalang.tree.Literal) - is_r_member = isinstance(node.operandr, javalang.tree.MemberReference) - is_l_member = isinstance(node.operandl, javalang.tree.MemberReference) - is_l_meth_inv = isinstance(node.operandl, javalang.tree.MethodInvocation) - is_r_meth_inv = isinstance(node.operandr, javalang.tree.MethodInvocation) - is_l_this = isinstance(node.operandl, javalang.tree.This) - is_r_this = isinstance(node.operandr, javalang.tree.This) - if is_l_literal and (is_r_member or is_r_meth_inv or is_r_this): - is_string_literal = '"' in node.operandl.value # type: ignore - if is_string_literal: - if node.operandl.position: - lines.add(node.operandl.position.line) - elif node.operandr.position: - lines.add(node.operandr.position.line) - elif hasattr(node.operandl, '_position'): - lines.add(node.operandl._position.line) - elif hasattr(node.operandr, '_position'): - lines.add(node.operandr._position.line) - elif is_r_literal and (is_l_member or is_l_meth_inv or is_l_this): - is_string_literal = '"' in node.operandr.value # type: ignore - if is_string_literal: - if node.operandl.position: - lines.add(node.operandl.position.line) - elif node.operandr.position: - lines.add(node.operandr.position.line) - elif hasattr(node.operandr, '_position'): - lines.add(node.operandr._position.line) - elif hasattr(node.operandl, '_position'): - lines.add(node.operandl._position.line) + ''' + Any usage string concatenation using '+' operator is considered as a pattern. + ''' + def _check_left_right_operator(self, node: ASTNode) -> bool: + assert node.node_type == ASTNodeType.BINARY_OPERATION + for operator_side in [node.operandr, node.operandl]: + if operator_side.node_type == ASTNodeType.LITERAL and isinstance(operator_side.value, str) and \ + not operator_side.value.isdigit(): + return True + return False + + def value(self, filename: str) -> List[int]: + lines: Set[int] = set() + ast = AST.build_from_javalang(build_ast(filename)) + for node in ast.get_proxy_nodes(ASTNodeType.BINARY_OPERATION): + if node.operator == '+' and self._check_left_right_operator(node): + lines.add(node.line) return sorted(lines) diff --git a/aibolit/patterns/this_finder/this_finder.py b/aibolit/patterns/this_finder/this_finder.py deleted file mode 100644 index bba76d9e..00000000 --- a/aibolit/patterns/this_finder/this_finder.py +++ /dev/null @@ -1,121 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2020 Aibolit -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import javalang - -from aibolit.utils.ast_builder import build_ast - - -class ThisFinder: - - def __expr_stat(self, expr, flag_this, flag_else): - '''function to work with StatementExpression block''' - if isinstance(expr.expression, javalang.tree.ExplicitConstructorInvocation): - if flag_this + flag_else > 0: - return 1, flag_this, flag_else - flag_this = 1 - else: - if flag_this > 0: - return 1, flag_this, flag_else - flag_else = 1 - return 0, flag_this, flag_else - - def __try_stat(self, expr, flag_this, flag_else): - '''function to work with TryStatement block''' - if (expr.resources is not None) or \ - (expr.catches is not None and expr.catches[0].block != []) or \ - (expr.finally_block is not None): - flag_else = 1 - try_exprs = expr.block - for expr1 in try_exprs: - if isinstance(expr1, javalang.tree.StatementExpression): - res, flag_this, flag_else = self.__expr_stat(expr1, flag_this, flag_else) - if res > 0: - return 1, flag_this, flag_else - else: - if flag_this > 0: - return 1, flag_this, flag_else - flag_else = 1 - flag_else = 1 - return 0, flag_this, flag_else - - def __if_stat(self, expr, flag_this, flag_else): - '''function to work with IfStatement block''' - if expr.then_statement is not None: - if hasattr(expr.then_statement, 'statements'): - stmts = expr.then_statement.statements - else: - stmts = [] - res, flag_this, flag_else = self.__work_with_stats(stmts, flag_this, flag_else) - if res > 0: - return 1, flag_this, flag_else - if expr.else_statement is not None: - if isinstance(expr.else_statement, javalang.tree.IfStatement): - res, flag_this, flag_else = self.__if_stat(expr.else_statement, flag_this, flag_else) - if res > 0: - return 1, flag_this, flag_else - return 0, flag_this, flag_else - block = expr.else_statement - res, flag_this, flag_else = self.__work_with_stats(block, flag_this, flag_else) - if res > 0: - return 1, flag_this, flag_else - return 0, flag_this, flag_else - - # flake8: noqa - def __work_with_stats(self, stats, flag_this, flag_else): - '''function to work with objects in constructor''' - for expr in stats: - res = 0 - old_else = flag_else - flag_else = 1 - if isinstance(expr, javalang.tree.TryStatement): - res, flag_this, flag_else = self.__try_stat(expr, flag_this, old_else) - elif isinstance(expr, javalang.tree.StatementExpression): - res, flag_this, flag_else = self.__expr_stat(expr, flag_this, old_else) - elif isinstance(expr, javalang.tree.IfStatement): - res, flag_this, flag_else = self.__if_stat(expr, flag_this, flag_else) - elif isinstance(expr, javalang.tree.ForStatement): - if hasattr(expr.body, 'statements'): - res, flag_this, flag_else = self.__work_with_stats(expr.body.statements, flag_this, flag_else) - elif isinstance(expr, javalang.tree.WhileStatement): - if hasattr(expr.body, 'statements'): - res, flag_this, flag_else = self.__work_with_stats(expr.body.statements, flag_this, flag_else) - elif isinstance(expr, javalang.tree.DoStatement): - if hasattr(expr.body, 'statements'): - res, flag_this, flag_else = self.__work_with_stats(expr.body.statements, flag_this, flag_else) - else: - res = flag_this - if res > 0: - return 1, flag_this, flag_else - return 0, flag_this, flag_else - - def value(self, filename: str): - '''main function''' - tree = build_ast(filename) - num_str = [] - for _, node in tree.filter(javalang.tree.ConstructorDeclaration): - number = node.position.line - stats = node.children[-1] - result, _, _ = self.__work_with_stats(stats, 0, 0) - if result == 1: - num_str.append(number) - return sorted(list(set(num_str))) diff --git a/scripts/02-filter-and-move.py b/scripts/02-filter-and-move.py index fe02697f..d92e0247 100644 --- a/scripts/02-filter-and-move.py +++ b/scripts/02-filter-and-move.py @@ -47,7 +47,7 @@ '--max_classes', type=lambda v: sys.maxsize if v == '' else int(v), required=False, - default=None + default=sys.maxsize ) parser.add_argument( '--split_only', @@ -60,12 +60,18 @@ MAX_CLASSES = args.max_classes TXT_OUT = 'found-java-files.txt' CSV_OUT = '02-java-files.csv' - -DIR_TO_CREATE = 'target/02' -FILE_TO_SAVE = '02-java-files.csv' current_location: str = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__)) ) +target_folder = os.getenv('TARGET_FOLDER') +if target_folder: + Path(target_folder).mkdir(parents=True, exist_ok=True) +else: + target_folder = str(Path(current_location).absolute()) + +print(f'Target folder: {target_folder}') +DIR_TO_CREATE = Path(target_folder, 'target/02') +FILE_TO_SAVE = '02-java-files.csv' class ClassType(Enum): @@ -175,7 +181,7 @@ def walk_in_parallel(): queue = manager.Queue() for i in scantree(args.dir): - queue.put(Path(i)) + queue.put(Path(i).absolute()) cancel = Value(c_bool, False) counter = Counter(0) @@ -204,8 +210,8 @@ def call_back(): if __name__ == '__main__': - path_csv_out = str(Path(current_location, DIR_TO_CREATE, CSV_OUT)) - path_txt_out = str(Path(current_location, DIR_TO_CREATE, TXT_OUT)) + path_csv_out = str(Path(DIR_TO_CREATE, CSV_OUT)) + path_txt_out = str(Path(DIR_TO_CREATE, TXT_OUT)) if not args.split_only: start = time.time() @@ -217,12 +223,12 @@ def call_back(): df = pd.DataFrame(results, columns=['filename', 'class_type']) df = df[df['class_type'] == 999] df.to_csv(path_csv_out, index=False) - df['filename'].to_csv(path_txt_out, header=None, index=None) + df['filename'].to_csv(path_txt_out, header=None, index=None, encoding='utf-8') end = time.time() print('It took ' + str(end - start) + ' seconds') df = pd.read_csv(path_csv_out) train, test = train_test_split(df['filename'], test_size=0.3, random_state=42) - train_csv_file = str(Path(current_location, DIR_TO_CREATE, '02-train.csv')) - test_csv_file = str(Path(current_location, DIR_TO_CREATE, '02-test.csv')) + train_csv_file = str(Path(DIR_TO_CREATE, '02-train.csv')) + test_csv_file = str(Path(DIR_TO_CREATE, '02-test.csv')) train.to_csv(train_csv_file, index=False) test.to_csv(test_csv_file, index=False) diff --git a/scripts/04-find-patterns.py b/scripts/04-find-patterns.py index 5042a6dd..b0bc2bdc 100644 --- a/scripts/04-find-patterns.py +++ b/scripts/04-find-patterns.py @@ -43,7 +43,14 @@ required=True) args = parser.parse_args() -dir_path = os.path.dirname(os.path.realpath(__file__)) +current_location: str = os.path.realpath( + os.path.join(os.getcwd(), os.path.dirname(__file__)) +) +target_folder = os.getenv('TARGET_FOLDER') +if target_folder: + os.chdir(target_folder) +else: + target_folder = os.path.dirname(os.path.realpath(__file__)) def log_result(result, file_to_write): @@ -104,7 +111,7 @@ def execute_python_code_in_parallel_thread(exceptions, file_absolute_path): # flake8: noqa: C901 def write_log_error(exceptions): - errors_log_path = str(Path(dir_path, 'errors.csv')).strip() + errors_log_path = str(Path(target_folder, 'errors.csv')).strip() exp_sorter = defaultdict(set) exp_number = defaultdict(int) if exceptions: @@ -117,12 +124,12 @@ def write_log_error(exceptions): exp_sorter[ex['pattern_name']].add(ex['exc_type']) exp_number[ex['pattern_name']] += 1 - dir_log_to_create = Path(dir_path, 'log') + dir_log_to_create = Path(target_folder, 'log') if not dir_log_to_create.exists(): dir_log_to_create.mkdir(parents=True) - exceptions_number_path = str(Path(dir_path, 'log/exceptions_number.csv')).strip() - exceptions_unique_path = str(Path(dir_path, 'log/exceptions_unique.csv')).strip() + exceptions_number_path = str(Path(target_folder, 'log/exceptions_number.csv')).strip() + exceptions_unique_path = str(Path(target_folder, 'log/exceptions_unique.csv')).strip() with open(exceptions_unique_path, 'w', newline='') as myfile: writer = csv.writer(myfile) @@ -135,10 +142,10 @@ def write_log_error(exceptions): writer.writerow([pattern, number]) filenames_w_errs = list(exc_dict.keys()) - dir_to_create = Path(dir_path, 'log/files') + dir_to_create = Path(target_folder, 'log/files') if not dir_to_create.exists(): dir_to_create.mkdir(parents=True) - files_with_errors = str(Path(dir_path, 'log/files/files_with_exceptions.txt')).strip() + files_with_errors = str(Path(target_folder, 'log/files/files_with_exceptions.txt')).strip() with open(files_with_errors, 'w') as myfile: myfile.writelines(filenames_w_errs) @@ -155,7 +162,7 @@ def write_log_error(exceptions): if copied_files: try: - tar_filename = str(Path(dir_path, 'log/files.tar.gz')) + tar_filename = str(Path(target_folder, 'log/files.tar.gz')) cmd = ['tar', '-czvf', tar_filename, str(dir_to_create.absolute())] output = subprocess.check_output(cmd).decode("utf-8").strip() print(output) diff --git a/scripts/05-calculate-rs.py b/scripts/05-calculate-rs.py index dbe6eba3..0a565e4b 100644 --- a/scripts/05-calculate-rs.py +++ b/scripts/05-calculate-rs.py @@ -42,6 +42,10 @@ args = parser.parse_args() dir_path = os.path.dirname(os.path.realpath(__file__)) +target_folder = Path(os.getenv('TARGET_FOLDER')) +if target_folder: + os.chdir(str(target_folder)) + results = {} path = 'target/05' os.makedirs(path, exist_ok=True) diff --git a/scripts/06-calculate-halstead.py b/scripts/06-calculate-halstead.py index 1206634e..60f2eaee 100644 --- a/scripts/06-calculate-halstead.py +++ b/scripts/06-calculate-halstead.py @@ -49,6 +49,7 @@ args = parser.parse_args() dir_path = os.path.dirname(os.path.realpath(__file__)) +target_folder = Path(os.getenv('TARGET_FOLDER')) results = {} path = 'target/06' @@ -85,6 +86,8 @@ def call_proc(cmd, java_file): handled_files = [] count = 0 max_count = args.max_count + halstead_location = str(Path(dir_path, 'halstead.jar')) + print(f'halstead location: {halstead_location}') print(max_count) with open(args.filename, 'r') as f: for i in f.readlines(): @@ -92,7 +95,7 @@ def call_proc(cmd, java_file): java_file = str(Path(dir_path, i)).strip() pool.apply_async( call_proc, - args=(['java', '-jar', 'halstead.jar', java_file], i,), + args=(['java', '-jar', halstead_location, java_file], i,), callback=log_result) count += 1 else: diff --git a/scripts/07-merge.py b/scripts/07-merge.py index db754f14..9f4393f8 100644 --- a/scripts/07-merge.py +++ b/scripts/07-merge.py @@ -1,10 +1,12 @@ import pandas as pd +import os -df_pmd_metrics = pd.read_csv('./target/03/pmd_metrics.csv', sep=';').set_index('filename') -df_patterns = pd.read_csv('./target/04/04-find-patterns.csv', sep=';').set_index('filename') -df_halstead = pd.read_csv('./target/06/06_halstead_volume.csv', sep=';').set_index('filename') +current_location: str = os.path.realpath( + os.path.join(os.getcwd(), os.path.dirname(__file__)) +) +target_folder = os.getenv('TARGET_FOLDER') +if target_folder: + os.chdir(target_folder) -first_df = df_pmd_metrics.join(df_patterns, how='inner') -halstead = first_df.join(df_halstead, how='inner') -print(halstead.head()) -halstead.to_csv('./target/dataset.csv') +df_patterns = pd.read_csv('./target/04/04-find-patterns.csv', sep=';').set_index('filename') +df_patterns.to_csv('./target/dataset.csv') diff --git a/scripts/08-split.py b/scripts/08-split.py new file mode 100644 index 00000000..6419dedd --- /dev/null +++ b/scripts/08-split.py @@ -0,0 +1,53 @@ +import os +from pathlib import Path + +import pandas as pd + + +def preprocess_file(filename: str): + print('reading dataset from {}'.format(filename)) + df = pd.read_csv(filename, index_col=0) + df = df[~df["filename"].str.lower().str.contains("test")] + df = df.dropna().drop_duplicates(subset=df.columns.difference(['filename'])) + df = df[(df.M2 > 20) & (df.M2 < 100)].copy() + return df + + +if __name__ == '__main__': + target_folder = os.getenv('TARGET_FOLDER') + if target_folder: + os.chdir(target_folder) + + current_location: str = os.path.realpath( + os.path.join(os.getcwd(), os.path.dirname(__file__)) + ) + dir_to_create = 'target/08' + + train_filenames = list(pd.read_csv(Path(current_location, 'target/02/02-train.csv'))['filename']) + test_filenames = list(pd.read_csv(Path(current_location, 'target/02/02-test.csv'))['filename']) + train_size = len(train_filenames) + test_size = len(test_filenames) + total_elems = train_size + test_size + print('{} train elems ({}%) and {} test elems test ({}%) of all dataset'.format( + train_size, train_size / total_elems, + test_size, test_size / total_elems)) + df = pd.read_csv(str(Path(current_location, './target/dataset.csv'))) + train = df[df['filename'].isin(train_filenames)] + test = df[df['filename'].isin(test_filenames)] + train.to_csv('train_temp.csv') + test.to_csv('test_temp.csv') + train_preprocessed = preprocess_file('train_temp.csv') + test_preprocessed = preprocess_file('test_temp.csv') + total_size = (train_preprocessed.shape[0] + test_preprocessed.shape[0]) + print('{} train elems ({}%) and {} test elems test ({}%) of processed dataset'.format( + train_preprocessed.shape[0], train_preprocessed.shape[0] / total_size, + test_preprocessed.shape[0], test_preprocessed.shape[0] / total_size)) + Path('train_temp.csv').unlink() + Path('test_temp.csv').unlink() + path_to_create = Path(dir_to_create) + if not path_to_create.exists(): + path_to_create.mkdir(parents=True) + train_csv_path = Path(path_to_create, '08-train.csv') + test_csv_path = Path(path_to_create, '08-test.csv') + train_preprocessed.to_csv(train_csv_path, encoding='utf-8') + test_preprocessed.to_csv(test_csv_path, encoding='utf-8') diff --git a/scripts/Makefile b/scripts/Makefile index fdf00325..adee0fd5 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -1,4 +1,4 @@ -all: requirements samples filter metrics patterns rs build_halstead hl merge +all: requirements samples filter patterns merge split # Fetch thousands of Java samples in GitHub and store them # all in target/01 directory. We don't filter anything at @@ -19,7 +19,7 @@ endif # be stored in target/02-java-files.csv path_to_java_files := $(if $(dir),$(dir), target/01) filter: - python3 ./02-filter-and-move.py --dir=${path_to_java_files} --max_classes=${max_classes} + python3 ./02-filter-and-move.py --dir ${path_to_java_files} --max_classes=${max_classes} # Here we go through the list of all Java classes in target/02, # and calculate a few metrics per each of them. The result of @@ -37,7 +37,7 @@ filter: path_to_java_files := $(if $(dir),$(dir), target/01) metrics: ./_tmp/pmd-bin/bin/run.sh - python3 ./03-calculate-metrics.py --dir=${path_to_java_files} + python3 ./03-calculate-metrics.py --dir ${path_to_java_files} # Here we go through the list of all Java classes in target/02 # and attempt to find patterns there. We produce a summary @@ -68,3 +68,6 @@ merge: clean: rm -rf target _tmp + +split: + python3 08-split.py diff --git a/test/integration/all.py b/test/integration/all.py index a9ca5b51..854c6535 100644 --- a/test/integration/all.py +++ b/test/integration/all.py @@ -29,6 +29,7 @@ from aibolit.config import Config +# TODO: fix all errors in the patterns/metrics and make these lists empty EXCLUDE_PATTERNS: List[str] = [] EXCLUDE_METRICS: List[str] = [] diff --git a/test/metrics/RFC/test_all_types.py b/test/integration/test_model.py similarity index 53% rename from test/metrics/RFC/test_all_types.py rename to test/integration/test_model.py index 703e9c90..2a7bde58 100644 --- a/test/metrics/RFC/test_all_types.py +++ b/test/integration/test_model.py @@ -19,33 +19,33 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -from aibolit.metrics.RFC.rfc import RFC import os -from unittest import TestCase +import shutil from pathlib import Path - - -class TestNum_MethodsandVars(TestCase): - dir_path = Path(os.path.realpath(__file__)).parent - get_rfc = RFC() - - def test1(self): - lines = self.get_rfc.value(Path(self.dir_path, '1.java')) - self.assertEqual(lines, 6) - - def test2(self): - lines = self.get_rfc.value(Path(self.dir_path, '2.java')) - self.assertEqual(lines, 11) - - def test3(self): - lines = self.get_rfc.value(Path(self.dir_path, '3.java')) - self.assertEqual(lines, 17) - - def test4(self): - lines = self.get_rfc.value(Path(self.dir_path, '4.java')) - self.assertEqual(lines, 7) - - def test5(self): - lines = self.get_rfc.value(Path(self.dir_path, '5.java')) - self.assertEqual(lines, 4) +from time import time + +from aibolit.config import Config +from aibolit.model.model import PatternRankingModel, scale_dataset, generate_fake_dataset + + +def test_model_training(): + cur_file_dir = Path(os.path.realpath(__file__)).parent + config = Config.get_patterns_config() + model = PatternRankingModel() + patterns = [x['code'] for x in config['patterns']] + train_df = generate_fake_dataset() + model.features_conf = {'features_order': patterns} + scaled_df = scale_dataset(train_df, model.features_conf) + start = time() + print('Start training...') + model.fit_regressor(scaled_df[patterns], scaled_df['M4']) + end = time() + print('End training. Elapsed time: {:.2f} secs'.format(end - start)) + # this folder is created by catboost library, impossible to get rid of it + catboost_folder = Path(cur_file_dir, 'catboost_info') + if catboost_folder.exists(): + shutil.rmtree(catboost_folder) + + +if __name__ == '__main__': + test_model_training() diff --git a/test/metrics/RFC/1.java b/test/metrics/RFC/1.java deleted file mode 100644 index 9f25256c..00000000 --- a/test/metrics/RFC/1.java +++ /dev/null @@ -1,19 +0,0 @@ -public class MyClass { - static void myMethod() { // +1 - System.out.println("Hello World!"); // +1 - } - - public static void main(String[] args) { // +0 because of using only other methods - - myMethod(); - } - - public static void main1(String[] args) { // +1 - double num1 = getNumber(); // +1 - double num2 = getNumber(); - char operation = getOperation(); // +1 - double result = calc(num1, num2, operation); // +1 - System.out.println("Результат:" + result); - } - - } \ No newline at end of file diff --git a/test/metrics/RFC/2.java b/test/metrics/RFC/2.java deleted file mode 100644 index a657eae2..00000000 --- a/test/metrics/RFC/2.java +++ /dev/null @@ -1,83 +0,0 @@ -import java.util.Scanner; - -public class Calculator { - public static void main(String[] args) { // +1 - double num1 = getNumber(); - double num2 = getNumber(); - char operation = getOperation(); - double result = calc(num1, num2, operation); - System.out.println("Результат:" + result); // +1 - } - - public static double getNumber() { // +1 - Scanner sc = new Scanner(System.in); // +1 - System.out.println("Введите число:"); - if(sc.hasNextDouble()) { // +1 - return sc.nextDouble(); // +1 - } else { - System.out.println("Ошибка при вводе. Повторите ввод"); - return getNumber(); - } - } - - public static char getOperation() { // +1 - Scanner sc = new Scanner(System.in); - System.out.println("Выберите номер операции:\n1 - прибавить\n2 - отнять\n3 - умножить\n4 - разделить"); - int operationNumber = 0; - if(sc.hasNextInt()) { // +1 - operationNumber = sc.nextInt(); - } else { - System.out.println("Вы ввели не число! Повторите ввод!"); - return getOperation(); - } - switch (operationNumber) { - case 1: - return '+'; - case 2: - return '-'; - case 3: - return '*'; - case 4: - return '/'; - default: - System.out.println("Неправильная операция! Повторите ввод!"); - return getOperation(); - } - } - - public static double add(double num1, double num2) { // +1 - return num1+num2; - } - - public static double sub(double num1, double num2) { // +1 - return num1-num2; - } - - public static double mul(double num1, double num2) { // +1 - return num1*num2; - } - - public static double div(double num1, double num2) { // +1 - if(num2 != 0.0) { - return num1/num2; - } else { - System.out.println("На 0 делить нельзя!"); - return Double.NaN; - } - } - - public static double calc(double num1, double num2, char operation) { // +1 - switch (operation) { - case '+': - return add(num1, num2); - case '-': - return sub(num1, num2); - case '*': - return mul(num1, num2); - case '/': - return div(num1, num2); - default: - return Double.NaN; - } - } -} \ No newline at end of file diff --git a/test/metrics/RFC/3.java b/test/metrics/RFC/3.java deleted file mode 100644 index 4e38f3e2..00000000 --- a/test/metrics/RFC/3.java +++ /dev/null @@ -1,118 +0,0 @@ -import javax.validation.constraints.NotNull; - -/** - * Routine. - * - * @author Yegor Bugayenko (yegor256@gmail.com) - * @version $Id$ - * @since 1.50 - * @todo #1125:30min Routine should be delegate execution to separate threads. - * Currently com.rultor.Routine#process() is sequentially processing all Talks - * and breaking out of this sequential processing to log occurring exceptions. - * This leads to issues in one build breaking all builds globally. - * This should be reworked to run the chain of Agents for each talk in a - * separate thread, not interfering with the main Routine in error cases. - * Once this is done the swallowing of generic exceptions, added to - * circumvent this issue, in - * com.rultor.agents.github.Reports#process(com.jcabi.xml.XML) should be - * removed. - */ -@ScheduleWithFixedDelay(delay = 1, unit = TimeUnit.MINUTES, threads = 1) -@SuppressWarnings("PMD.DoNotUseThreads") -final class Routine implements Runnable, Closeable { - - /** - * Shutting down? - */ - private final transient AtomicBoolean down = new AtomicBoolean(); - - /** - * When I started. - */ - private final transient long start = System.currentTimeMillis(); // +1 - - /** - * Ticks. - */ - private final transient Pulse pulse; - - /** - * Talks. - */ - private final transient Talks talks; - - /** - * Agents. - */ - private final transient Agents agents; - - /** - * Ctor. - * @param tlks Talks - * @param pls Pulse - * @param github Github client - * @param sttc Sttc client - * @checkstyle ParameterNumberCheck (4 lines) - */ - Routine(@NotNull final Talks tlks, final Pulse pls, - final Github github, final Sttc sttc) { - this.talks = tlks; - this.pulse = pls; - this.agents = new Agents(github, sttc); - } // +1 - - @Override - public void close() { // +1 - this.down.set(true); // +1 - } - - @Override - @SuppressWarnings("PMD.AvoidCatchingThrowable") - public void run() { // +1 - Logger.info( // +1 - this, "%d active talks, alive for %[ms]s: %tc", - this.safe(), - System.currentTimeMillis() - this.start, new Date() - ); - this.pulse.error(Collections.emptyList()); // +2 - } - - /** - * Routine every-minute proc. - * @return Total talks processed - * @throws IOException If fails - */ - @Timeable(limit = Tv.TWENTY, unit = TimeUnit.MINUTES) - private int safe() throws IOException { // +1 - final long begin = System.currentTimeMillis(); - int total = 0; - if (new Toggles.InFile().readOnly()) { // +1 - Logger.info(this, "read-only mode"); - } else { - total = this.process(); - } - this.pulse.add( // +1 - new Tick(begin, System.currentTimeMillis() - begin, total) - ); - return total; - } - - /** - * Routine every-minute proc. - * @return Total talks processed - * @throws IOException If fails - */ - private int process() throws IOException { // +1 - this.agents.starter().execute(this.talks); // +1 - final Profiles profiles = new Profiles(); - int total = 0; - for (final Talk talk : this.talks.active()) { // +1 - ++total; - final Profile profile = profiles.fetch(talk); // +1 - this.agents.agent(talk, profile).execute(talk); // +1 - } - this.agents.closer().execute(this.talks); // +1 - return total; - } - -} \ No newline at end of file diff --git a/test/metrics/RFC/4.java b/test/metrics/RFC/4.java deleted file mode 100644 index b8c401f7..00000000 --- a/test/metrics/RFC/4.java +++ /dev/null @@ -1,20 +0,0 @@ -public class MyClass { - static void myMethod() { // +1 - System.out.println("Hello World!"); // +1 - } - - public static void main(String[] args) { // +1 - - myMethod(); - } - - - public static void main1(String[] args) { // +1 - double num1 = getNumber(); // +1 - double num2 = getNumber(); - char operation = getOperation(); // +1 - double result = calc(num1, num2, operation); // +1 - System.out.println("Результат:" + calc1(num1, num2, operation)); // +1 - } - - } diff --git a/test/metrics/RFC/5.java b/test/metrics/RFC/5.java deleted file mode 100644 index b0bccc31..00000000 --- a/test/metrics/RFC/5.java +++ /dev/null @@ -1,17 +0,0 @@ -public class TEST { - static int square(int a){ // +1 - int result = a*a; - return result; - } - static int square1(int a){ // +0 because it is depend on previous method - int result = square(a); - return result; - } - - - public static void main(String[] args) { // +1 - - System.out.println(sq(square(2))); // +2 - } - -} diff --git a/test/metrics/RFC/EmptyPublicAndPrivateMethods.java b/test/metrics/RFC/EmptyPublicAndPrivateMethods.java new file mode 100644 index 00000000..153788f1 --- /dev/null +++ b/test/metrics/RFC/EmptyPublicAndPrivateMethods.java @@ -0,0 +1,15 @@ +// Total RFC = 2 + +public class FirstClass { + public void firstEmptyPublicMethod() { // +1 for public method + } + + public void secondEmptyPublicMethod() { // +1 for public method + } + + private void firstEmptyPrivateMethod() { + } + + private void secondEmptyPrivateMethod() { + } +} diff --git a/test/metrics/RFC/EmptyPublicMethods.java b/test/metrics/RFC/EmptyPublicMethods.java new file mode 100644 index 00000000..2373da51 --- /dev/null +++ b/test/metrics/RFC/EmptyPublicMethods.java @@ -0,0 +1,9 @@ +// Total RFC = 2 + +public class FirstClass { + public void firstEmptyPublicMethod() { // +1 for public method + } + + public void secondEmptyPublicMethod() { // +1 for public method + } +} diff --git a/test/metrics/RFC/InheritedMethods.java b/test/metrics/RFC/InheritedMethods.java new file mode 100644 index 00000000..ba9d9237 --- /dev/null +++ b/test/metrics/RFC/InheritedMethods.java @@ -0,0 +1,12 @@ +// Total RFC = 2 + +public class BaseClass { // class RFC = 1 + public void baseMethod() { // +1 for public method + } +} + +public class DerivedClass extends BaseClass { // class RFC = 1 + public void publicMethod() { // +1 for public method + baseMethod(); // calling inherited method do not count + } +} diff --git a/test/metrics/RFC/InvocationOfLocalAndOuterMethodsWithSameName.java b/test/metrics/RFC/InvocationOfLocalAndOuterMethodsWithSameName.java new file mode 100644 index 00000000..5a309644 --- /dev/null +++ b/test/metrics/RFC/InvocationOfLocalAndOuterMethodsWithSameName.java @@ -0,0 +1,14 @@ +// Total RFC = 4 + +public class FirstClass { + public void firstPublicMethod() { // +1 for public method + System.out.println("Hello World!"); // +1 for invocation (outer method) + } + + public void secondPublicMethod() { // +1 for public method + println(); // +1 for invocation (local method) + } + + private void println() { + } +} diff --git a/test/metrics/RFC/OverwrittenInheritedMethods.java b/test/metrics/RFC/OverwrittenInheritedMethods.java new file mode 100644 index 00000000..bb0759dd --- /dev/null +++ b/test/metrics/RFC/OverwrittenInheritedMethods.java @@ -0,0 +1,12 @@ +// Total RFC = 2 + +public class BaseClass { // class RFC = 1 + public void publicMethod() { // +1 for public method + } +} + +public class DerivedClass extends BaseClass { // class RFC = 1 + public void publicMethod() { // +1 for public method + super.publicMethod(); // calling inherited method do not count + } +} diff --git a/test/metrics/RFC/PublicMethodsInvokeOuterMethods.java b/test/metrics/RFC/PublicMethodsInvokeOuterMethods.java new file mode 100644 index 00000000..ad4f2edc --- /dev/null +++ b/test/metrics/RFC/PublicMethodsInvokeOuterMethods.java @@ -0,0 +1,10 @@ +// Total RFC = 3 + +public class FirstClass { + public void firstPublicMethod() { // +1 for public method + System.out.println("Hello World!"); // +1 for invocation + } + + public void secondPublicMethod() { // +1 for public method + } +} diff --git a/test/metrics/RFC/PublicMethodsInvokePrivateMethods.java b/test/metrics/RFC/PublicMethodsInvokePrivateMethods.java new file mode 100644 index 00000000..3b7d3d34 --- /dev/null +++ b/test/metrics/RFC/PublicMethodsInvokePrivateMethods.java @@ -0,0 +1,17 @@ +// Total RFC = 3 + +public class FirstClass { + public void firstPublicMethod() { // +1 for public method + firstPrivateMethod(); // +1 for invocation + } + + public void secondPublicMethod() { // +1 for public method + } + + private void firstPrivateMethod() { + secondPrivateMethod(); // invocation inside private methods do not count + } + + private void secondPrivateMethod() { + } +} diff --git a/test/metrics/RFC/PublicMethodsInvokePublicMethods.java b/test/metrics/RFC/PublicMethodsInvokePublicMethods.java new file mode 100644 index 00000000..39e4c52a --- /dev/null +++ b/test/metrics/RFC/PublicMethodsInvokePublicMethods.java @@ -0,0 +1,10 @@ +// Total RFC = 3 + +public class FirstClass { + public void firstPublicMethod() { // +1 for public method + secondPublicMethod(); // +1 for invocation + } + + public void secondPublicMethod() { // +1 for public method + } +} diff --git a/test/metrics/RFC/SeveralClasses.java b/test/metrics/RFC/SeveralClasses.java new file mode 100644 index 00000000..b407f37f --- /dev/null +++ b/test/metrics/RFC/SeveralClasses.java @@ -0,0 +1,11 @@ +// Total RFC = 2 + +public class FirstClass { // class RFC = 1 + public void firstEmptyPublicMethod() { // +1 for public method + } +} + +public class SecondClass { // class RFC = 1 + public void firstEmptyPublicMethod() { // +1 for public method + } +} diff --git a/test/metrics/RFC/SeveralInvocationOfSameMethod.java b/test/metrics/RFC/SeveralInvocationOfSameMethod.java new file mode 100644 index 00000000..828eb1f3 --- /dev/null +++ b/test/metrics/RFC/SeveralInvocationOfSameMethod.java @@ -0,0 +1,16 @@ +// Total RFC = 4 + +public class FirstClass { + public void firstPublicMethod() { // +1 for public method + System.out.println("Hello World!"); // +1 for invocation + } + + public void secondPublicMethod() { // +1 for public method + System.out.println("Goodbye World!"); // println was already invoked on line 5 + firstPrivateMethod(); // +1 for invocation + firstPrivateMethod(); // firstPrivateMethod was already invoked on line 10 + } + + private void firstPrivateMethod() { + } +} diff --git a/test/metrics/RFC/test_rfc.py b/test/metrics/RFC/test_rfc.py new file mode 100644 index 00000000..fbf40541 --- /dev/null +++ b/test/metrics/RFC/test_rfc.py @@ -0,0 +1,85 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from unittest import TestCase +from pathlib import Path + +from aibolit.metrics.RFC.rfc import RFC +from aibolit.ast_framework import AST +from aibolit.utils.ast_builder import build_ast + + +class RFCTestSuite(TestCase): + def test_empty_public_methods(self): + ast = RFCTestSuite._get_ast("EmptyPublicMethods.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 2) + + def test_empty_public_and_private_methods(self): + ast = RFCTestSuite._get_ast("EmptyPublicAndPrivateMethods.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 2) + + def test_public_methods_invoke_public_methods(self): + ast = RFCTestSuite._get_ast("PublicMethodsInvokePublicMethods.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 3) + + def test_public_methods_invoke_private_methods(self): + ast = RFCTestSuite._get_ast("PublicMethodsInvokePrivateMethods.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 3) + + def test_public_methods_invoke_outer_methods(self): + ast = RFCTestSuite._get_ast("PublicMethodsInvokeOuterMethods.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 3) + + def test_several_invocation_of_same_method(self): + ast = RFCTestSuite._get_ast("SeveralInvocationOfSameMethod.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 4) + + def test_invocation_of_local_and_outer_methods_with_same_name(self): + ast = RFCTestSuite._get_ast("InvocationOfLocalAndOuterMethodsWithSameName.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 4) + + def test_several_classes(self): + ast = RFCTestSuite._get_ast("SeveralClasses.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 2) + + def test_inherited_methods(self): + ast = RFCTestSuite._get_ast("InheritedMethods.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 2) + + def test_overwritten_inherited_methods(self): + ast = RFCTestSuite._get_ast("OverwrittenInheritedMethods.java") + rfc = RFC() + self.assertEqual(rfc.value(ast), 2) + + @staticmethod + def _get_ast(filename: str) -> AST: + path = Path(__file__).absolute().parent / filename + return AST.build_from_javalang(build_ast(str(path))) diff --git a/test/metrics/fanout/1.java b/test/metrics/fanout/1.java deleted file mode 100644 index 4ae6d4b2..00000000 --- a/test/metrics/fanout/1.java +++ /dev/null @@ -1,7 +0,0 @@ -class InputClassComplexity { - Set set = new HashSet(); - Map map = new HashMap(); - Date date = new Date(); - Time time = new Time(); - Place place = new Place(); -} diff --git a/test/metrics/fanout/2.java b/test/metrics/fanout/2.java deleted file mode 100644 index c80d4c99..00000000 --- a/test/metrics/fanout/2.java +++ /dev/null @@ -1,9 +0,0 @@ -public class Class1 { - public void loginAs(String username, String password) - { - Class2 class2 = new Class2(); - class22.invokeSomeMethod(); - //your actual code - } -} - diff --git a/test/metrics/fanout/3.java b/test/metrics/fanout/3.java deleted file mode 100644 index 05beccd9..00000000 --- a/test/metrics/fanout/3.java +++ /dev/null @@ -1,9 +0,0 @@ -class bbb{ - aaa obj=new aaa(); - bbb(){//constructor - System.out.println(obj.value); - } - public static void main(String args[]){ - bbb obj2=new bbb(); - } - } \ No newline at end of file diff --git a/test/metrics/fanout/4.java b/test/metrics/fanout/4.java deleted file mode 100644 index 018784a5..00000000 --- a/test/metrics/fanout/4.java +++ /dev/null @@ -1,22 +0,0 @@ -import java.util.ArrayDeque; - -public class Program{ - - public static void main(String[] args) { - - ArrayDeque states = new ArrayDeque(); - // стандартное добавление элементов - states.add("Germany"); - states.addFirst("France"); // добавляем элемент в самое начало - states.push("Great Britain"); // добавляем элемент в самое начало - states.addLast("Spain"); // добавляем элемент в конец коллекции - states.add("Italy"); - - // получаем первый элемент без удаления - String sFirst = states.getFirst(); - System.out.println(sFirst); // Great Britain - // получаем последний элемент без удаления - String sLast = states.getLast(); - System.out.println(sLast); // Italy - } -} \ No newline at end of file diff --git a/test/metrics/fanout/5.java b/test/metrics/fanout/5.java deleted file mode 100644 index 274cb824..00000000 --- a/test/metrics/fanout/5.java +++ /dev/null @@ -1,32 +0,0 @@ -public class Main { - - public static void main(String[] args) { - - MonitoringSystem generalModule = new MonitoringSystem() { - @Override - public void startMonitoring() { - System.out.println("Мониторинг общих показателей стартовал!"); - } - }; - - - - MonitoringSystem errorModule = new MonitoringSystem() { - @Override - public void startMonitoring() { - System.out.println("Мониторинг отслеживания ошибок стартовал!"); - } - }; - - MonitoringSystem securityModule = new MonitoringSystem() { - @Override - public void startMonitoring() { - System.out.println("Мониторинг безопасности стартовал!"); - } - }; - - generalModule.startMonitoring(); - errorModule.startMonitoring(); - securityModule.startMonitoring(); - } - } \ No newline at end of file diff --git a/test/metrics/fanout/ClassReferencedFromPackage.java b/test/metrics/fanout/ClassReferencedFromPackage.java new file mode 100644 index 00000000..df66e624 --- /dev/null +++ b/test/metrics/fanout/ClassReferencedFromPackage.java @@ -0,0 +1,8 @@ +// Total FanOut = 2 + +import package1; + +public class FirstClass { + package1.Class1 field1; // +1 for Class1 + package1.Class2 field2; // +1 for Class2 +} diff --git a/test/metrics/fanout/CountTypesInFields.java b/test/metrics/fanout/CountTypesInFields.java new file mode 100644 index 00000000..106e6865 --- /dev/null +++ b/test/metrics/fanout/CountTypesInFields.java @@ -0,0 +1,9 @@ +// Total FanOut = 3 + +public class FirstClass { + Set set = new HashSet(); // Set is excluded from considering + Map map = new HashMap(); // Map is excluded from considering + Date date = new Date(); // +1 for Date + Time time = new Time(); // +1 for Time + Place place = new Place(); // +1 for Place +} diff --git a/test/metrics/fanout/ExtendingType.java b/test/metrics/fanout/ExtendingType.java new file mode 100644 index 00000000..03061098 --- /dev/null +++ b/test/metrics/fanout/ExtendingType.java @@ -0,0 +1,6 @@ +// Total FanOut = 1 + +import BaseClass; + +public class DerivedClass extends BaseClass { // +1 for BaseClass +}; diff --git a/test/metrics/fanout/GenericAndPackagedTypes.java b/test/metrics/fanout/GenericAndPackagedTypes.java new file mode 100644 index 00000000..ec78712f --- /dev/null +++ b/test/metrics/fanout/GenericAndPackagedTypes.java @@ -0,0 +1,8 @@ +// Total FanOut = 2 + +import SomePackage; +import AnotherPackage.GenericType; + +public class FirstClass { + Map> field; // +2 for SomeClass and GenericType +} \ No newline at end of file diff --git a/test/metrics/fanout/GenericType.java b/test/metrics/fanout/GenericType.java new file mode 100644 index 00000000..f001f208 --- /dev/null +++ b/test/metrics/fanout/GenericType.java @@ -0,0 +1,5 @@ +// Total FanOut = 1 + +public class FirstClass { + Set set; // +1 for SomeType, Set is not considered +} \ No newline at end of file diff --git a/test/metrics/fanout/SelfUsage.java b/test/metrics/fanout/SelfUsage.java new file mode 100644 index 00000000..3ee5a96d --- /dev/null +++ b/test/metrics/fanout/SelfUsage.java @@ -0,0 +1,7 @@ +// Total FanOut = 0 + +public class FirstClass{ + public static void main(String args[]){ + FirstClass object=new FirstClass(); // Using itself is not considered + } +} diff --git a/test/metrics/fanout/TypeUsedSeveralTimes.java b/test/metrics/fanout/TypeUsedSeveralTimes.java new file mode 100644 index 00000000..d31c6783 --- /dev/null +++ b/test/metrics/fanout/TypeUsedSeveralTimes.java @@ -0,0 +1,14 @@ +// Total FanOut = 1 + +public class FirstClass { + public void firstMethod() { + UsedClass object = new UsedClass(); // +1 for UsedClass + object.doSomething(); + } + + public void secondMethod() { + UsedClass object = new UsedClass(); // UsedClass was mentioned earlier on line 5 + object.doSomethingElse(); + } +} + diff --git a/test/metrics/fanout/test_fan_out.py b/test/metrics/fanout/test_fan_out.py new file mode 100644 index 00000000..2323e1aa --- /dev/null +++ b/test/metrics/fanout/test_fan_out.py @@ -0,0 +1,71 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from pathlib import Path + +from unittest import TestCase + +from aibolit.metrics.fanout.FanOut import FanOut +from aibolit.ast_framework import AST +from aibolit.utils.ast_builder import build_ast + + +class FanOutTestSuite(TestCase): + def test_count_types_in_fields(self): + ast = FanOutTestSuite._build_ast('CountTypesInFields.java') + fan_out = FanOut() + self.assertEqual(fan_out.value(ast), 3) + + def test_type_used_several_times(self): + ast = FanOutTestSuite._build_ast('TypeUsedSeveralTimes.java') + fan_out = FanOut() + self.assertEqual(fan_out.value(ast), 1) + + def test_self_usage(self): + ast = FanOutTestSuite._build_ast('SelfUsage.java') + fan_out = FanOut() + self.assertEqual(fan_out.value(ast), 0) + + def test_class_referenced_from_package(self): + ast = FanOutTestSuite._build_ast('ClassReferencedFromPackage.java') + fan_out = FanOut() + self.assertEqual(fan_out.value(ast), 2) + + def test_extending_type(self): + ast = FanOutTestSuite._build_ast('ExtendingType.java') + fan_out = FanOut() + self.assertEqual(fan_out.value(ast), 1) + + def test_generic_type(self): + ast = FanOutTestSuite._build_ast('GenericType.java') + fan_out = FanOut() + self.assertEqual(fan_out.value(ast), 1) + + def test_generic_and_packaged_types(self): + ast = FanOutTestSuite._build_ast('GenericAndPackagedTypes.java') + fan_out = FanOut() + self.assertEqual(fan_out.value(ast), 2) + + @staticmethod + def _build_ast(filename: str) -> AST: + path = Path(__file__).absolute().parent / filename + return AST.build_from_javalang(build_ast(str(path))) diff --git a/test/metrics/ncss/BasicExample.java b/test/metrics/ncss/BasicExample.java index 7784c44c..1d1664bc 100644 --- a/test/metrics/ncss/BasicExample.java +++ b/test/metrics/ncss/BasicExample.java @@ -1,4 +1,4 @@ -// Total NCSS = 12 +// Total NCSS = 13 import java.util.Collections; import java.io.IOException; @@ -11,7 +11,7 @@ public void bigMethod() // +1 boolean a = false, b = true; // +1 if (a || b) { // +1 - try { + try { // +1 do { // +1 x += 2; // +1 } while (x < 12); diff --git a/test/metrics/ncss/FinallyBlock.java b/test/metrics/ncss/FinallyBlock.java index 9e9724dc..f74f1c14 100644 --- a/test/metrics/ncss/FinallyBlock.java +++ b/test/metrics/ncss/FinallyBlock.java @@ -1,10 +1,10 @@ -// Total NCSS = 6 +// Total NCSS = 7 class FinallyBlock { // +1 private int x = 0; // +1 public void tryIncrement() { // +1 - try { + try { // +1 // Increment will never raise exception, // but it can be replaced with something // more dangerous. diff --git a/test/metrics/ncss/SimpleExample.java b/test/metrics/ncss/SimpleExample.java index 197467ad..64328278 100644 --- a/test/metrics/ncss/SimpleExample.java +++ b/test/metrics/ncss/SimpleExample.java @@ -1,4 +1,4 @@ -// Total NCSS = 17 +// Total NCSS = 18 import java.util.Collections; import java.io.IOException; @@ -13,7 +13,7 @@ public void bigMethod() // +1 b = true; // +1 x = 0; // +1 if (a || b) { // +1 - try { + try { // +1 do { // +1 x += 2; // +1 } while (x < 12); diff --git a/test/metrics/ncss/SimpleExample2.java b/test/metrics/ncss/SimpleExample2.java index e7c09704..41e59576 100644 --- a/test/metrics/ncss/SimpleExample2.java +++ b/test/metrics/ncss/SimpleExample2.java @@ -1,4 +1,4 @@ -// Total NCSS = 18 +// Total NCSS = 19 import java.util.Collections; import java.io.IOException; @@ -12,7 +12,7 @@ public void bigMethod() // +1 b = true; // +1 x = 0; // +1 if (a || b) { // +1 - try { + try { // +1 int i, j; // +1 for(i = 0, j = 10; i < j; i++) { // +1 x += 2; // +1 diff --git a/test/metrics/ncss/test_all_types.py b/test/metrics/ncss/test_all_types.py index 2fd1509d..60386da1 100644 --- a/test/metrics/ncss/test_all_types.py +++ b/test/metrics/ncss/test_all_types.py @@ -41,19 +41,19 @@ def testBasicExample(self): file = 'test/metrics/ncss/BasicExample.java' metric = NCSSMetric() res = metric.value(file) - self.assertEqual(res, 12) + self.assertEqual(res, 13) def testSimpleExample(self): file = 'test/metrics/ncss/SimpleExample.java' metric = NCSSMetric() res = metric.value(file) - self.assertEqual(res, 17) + self.assertEqual(res, 18) def testSimpleExample2(self): file = 'test/metrics/ncss/SimpleExample2.java' metric = NCSSMetric() res = metric.value(file) - self.assertEqual(res, 18) + self.assertEqual(res, 19) def testChainedIfElse(self): file = 'test/metrics/ncss/ChainedIfElse.java' @@ -71,4 +71,4 @@ def testFinallyBlock(self): file = 'test/metrics/ncss/FinallyBlock.java' metric = NCSSMetric() res = metric.value(file) - self.assertEqual(res, 6) + self.assertEqual(res, 7) diff --git a/test/model/test_model.py b/test/model/test_model.py deleted file mode 100644 index 30dac771..00000000 --- a/test/model/test_model.py +++ /dev/null @@ -1,37 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2020 Aibolit -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import numpy as np -from unittest import TestCase -from aibolit.model.model import TwoFoldRankingModel - - -class TestModel(TestCase): - - def test_get_minimum(self): - ranking_model = TwoFoldRankingModel() - c1 = np.array([1, 4, 5, 3, 6, 6, 4, 3, 1]) - c2 = np.array([1, 2, 7, 3, 8, 4, 5, 3, -1]) - c3 = np.array([1, 4, 5, 7, 6, 3, 0, -3, 1]) - c, number = ranking_model.get_minimum(c1, c2, c3) - np.testing.assert_array_equal(c, np.array([1, 2, 5, 3, 6, 3, 0, -3, -1])) - np.testing.assert_array_equal(number, np.array([0, 1, 0, 0, 0, 2, 2, 2, 1])) diff --git a/test/patterns/multiply_try/test_multiple_try.py b/test/patterns/multiply_try/test_multiple_try.py index 061d0f9a..bb411033 100644 --- a/test/patterns/multiply_try/test_multiple_try.py +++ b/test/patterns/multiply_try/test_multiple_try.py @@ -36,7 +36,7 @@ def test_simple(self): def test_large_file(self): lines = self.method_chain_finder.value(Path(self.dir_path, 'Large.java')) - self.assertEqual(lines, [706, 620]) + self.assertEqual(lines, [620, 706]) def test_try_inside_anonymous(self): lines = self.method_chain_finder.value(Path(self.dir_path, 'TryInsideAnomymous.java')) diff --git a/test/patterns/send_null/test_send_null.py b/test/patterns/send_null/test_send_null.py index 4a9fa602..dd592cd8 100644 --- a/test/patterns/send_null/test_send_null.py +++ b/test/patterns/send_null/test_send_null.py @@ -14,7 +14,7 @@ def test_one_send(self): def test_multi_level_invocation(self): lines = self.method_send_null_finder.value(Path(self.dir_path, 'Configuration.java')) - self.assertEqual(lines, [379, 442, 549, 638, 656, 830, 866, 1362, 2393, 2988, 3080, 3492, 3855]) + self.assertEqual(lines, [379, 442, 549, 638, 656, 830, 866, 1362, 2393, 2874, 2988, 3080, 3492, 3758, 3855]) def test_no_null_methods(self): lines = self.method_send_null_finder.value(Path(self.dir_path, 'FillContent.java')) @@ -26,7 +26,7 @@ def test_simple_invocation(self): def test_more_method_invocations(self): lines = self.method_send_null_finder.value(Path(self.dir_path, 'SequenceFile.java')) - self.assertEqual(lines, [1186, 1201, 1217, 3285, 3298, 3367, 3537, 3550]) + self.assertEqual(lines, [1097, 1186, 1201, 1217, 3285, 3298, 3367, 3537, 3550]) def test_constructor_send_null(self): lines = self.method_send_null_finder.value(Path(self.dir_path, 'Constructor.java')) diff --git a/test/patterns/string_concat/test_concat_string.py b/test/patterns/string_concat/test_concat_string.py index f1283448..2116721a 100644 --- a/test/patterns/string_concat/test_concat_string.py +++ b/test/patterns/string_concat/test_concat_string.py @@ -63,7 +63,7 @@ def test_string_with_quotes(self): self.assertEqual(lines, [ 352, 372, 373, 379, 380, 408, 409, 418, 422, 429, 430, 438, 456, 466, 479, 494, 499, 509, 562, 563, 604, 605, 610, 701, 995, 1009, 1012, 1029, - 1032, 1080, 1258, 1267, 1337, 1386, 1414 + 1032, 1080, 1188, 1258, 1267, 1337, 1366, 1372, 1386, 1414 ]) def test_comment_inside_line(self): diff --git a/test/patterns/this_finder/double_this.java b/test/patterns/this_finder/double_this.java deleted file mode 100644 index f5b8a512..00000000 --- a/test/patterns/this_finder/double_this.java +++ /dev/null @@ -1,24 +0,0 @@ -class Temp extends Base -{ - private int i; - - Temp(int x) - { - this.i = x; - } - - - // constructor with one arguemnt. - Temp(int x, int z, int u, int y) - { - this(y); - this(z); - } - - public static void main(String[] args) - { - // Object creation by calling no-argument - // constructor. - new Temp(5, 6, 7, 8); - } -} \ No newline at end of file diff --git a/test/patterns/this_finder/test_find_this.py b/test/patterns/this_finder/test_find_this.py index 6ba95bf5..c6dd3ea0 100644 --- a/test/patterns/this_finder/test_find_this.py +++ b/test/patterns/this_finder/test_find_this.py @@ -22,21 +22,17 @@ import os from unittest import TestCase -from aibolit.patterns.this_finder.this_finder import ThisFinder +from aibolit.patterns.hybrid_constructor.hybrid_constructor import HybridConstructor -class TestFindThis(TestCase): +class TestHybridConstructor(TestCase): cur_dir = os.path.dirname(os.path.realpath(__file__)) - pattern = ThisFinder() + pattern = HybridConstructor() def test_several(self): lines = self.pattern.value(self.cur_dir + '/several.java') self.assertEqual(lines, [4, 10, 20]) - def test_simple1(self): - lines = self.pattern.value(self.cur_dir + '/double_this.java') - self.assertEqual(lines, [12]) - def test_simple2(self): lines = self.pattern.value(self.cur_dir + '/init_block.java') self.assertEqual(lines, []) @@ -49,10 +45,6 @@ def test_simple3(self): lines = self.pattern.value(self.cur_dir + '/autocloseable.java') self.assertEqual(lines, [4, 14, 31]) - def test_simple4(self): - lines = self.pattern.value(self.cur_dir + '/one_line_this.java') - self.assertEqual(lines, [11]) - def test_simple5(self): lines = self.pattern.value(self.cur_dir + '/one_line_usage.java') self.assertEqual(lines, [12]) diff --git a/test/recommend/test_recommend_pipeline.py b/test/recommend/test_recommend_pipeline.py index b3cf1b93..4ee344e9 100644 --- a/test/recommend/test_recommend_pipeline.py +++ b/test/recommend/test_recommend_pipeline.py @@ -42,6 +42,7 @@ def __init__(self, *args, **kwargs): self.config = Config.get_patterns_config() def __create_mock_input(self): + ex = Exception("Error occurred") item = { 'filename': '1.java', 'ncss': 100, @@ -70,7 +71,7 @@ def __create_mock_input(self): ] } error_file = { - 'error_string': "Error occured", + 'exception': str(ex), 'filename': 'hdd/home/Error.java', 'results': [] } @@ -88,6 +89,7 @@ def __suppress_argparse_mock(self): return argparse_mock def __create_input_for_xml(self): + ex = Exception("Smth happened") return [ {'filename': 'D:\\target\\0001\\fast\\Configuration.java', 'ncss': 100, @@ -104,7 +106,7 @@ def __create_input_for_xml(self): {'filename': 'D:\\target\\0001\\fast\\Error.java', 'results': [], 'ncss': 0, - 'error_string': "Smth happened" + 'exception': str(ex) }, {'filename': 'D:\\target\\0001\\fast\\Another.java', 'ncss': 50, @@ -132,7 +134,7 @@ def __create_mock_cmd(self): def test_calculate_patterns_and_metrics(self): args = self.__suppress_argparse_mock() file = Path(self.cur_file_dir, 'folder/LottieImageAsset.java') - input_params, code_lines_dict, error_string = calculate_patterns_and_metrics(file, args) + input_params, code_lines_dict, _ = calculate_patterns_and_metrics(file, args) val = code_lines_dict['P2'] self.assertNotEqual(val, 0) val = code_lines_dict['P24'] @@ -142,7 +144,7 @@ def test_calculate_patterns_and_metrics_wih_suppress(self): args = self.__suppress_argparse_mock() args.suppress = 'P2' file = Path(self.cur_file_dir, 'folder/LottieImageAsset.java') - input_params, code_lines_dict, error_string = calculate_patterns_and_metrics(file, args) + input_params, code_lines_dict, _ = calculate_patterns_and_metrics(file, args) val = code_lines_dict['P2'] self.assertEqual(val, 0) val = code_lines_dict['P24'] @@ -175,7 +177,7 @@ def test_text_format(self): new_mock = format_converter_for_pattern(mock_input) text = create_text(new_mock, full_report=True) md5_hash = md5('\n'.join(text).encode('utf-8')) - self.assertEqual(md5_hash.hexdigest(), '5efdc7930a66874e9d0c7dcfef734687') + self.assertEqual(md5_hash.hexdigest(), '2a67e22091ba2cfd76847d292ed90142') def test_empty_lines_format(self): new_mock = format_converter_for_pattern([]) @@ -188,7 +190,7 @@ def test_text_format_sort_by_code_line(self): new_mock = format_converter_for_pattern(mock_input, 'code_line') text = create_text(new_mock, full_report=True) md5_hash = md5('\n'.join(text).encode('utf-8')) - self.assertEqual(md5_hash.hexdigest(), '1324e129e6badbfb6e10f742667023ae') + self.assertEqual(md5_hash.hexdigest(), '8c6b91b8600848b66a9e91f2047d2d6d') def test_find_start_end_line_function(self): # Check start and end line for MethodDeclaration, diff --git a/wp/sections/implementation.tex b/test/stats/__init__.py similarity index 100% rename from wp/sections/implementation.tex rename to test/stats/__init__.py diff --git a/test/stats/model.pkl b/test/stats/model.pkl new file mode 100644 index 00000000..ef0b075e Binary files /dev/null and b/test/stats/model.pkl differ diff --git a/test/stats/results_test.csv b/test/stats/results_test.csv new file mode 100644 index 00000000..6070e09a --- /dev/null +++ b/test/stats/results_test.csv @@ -0,0 +1,35 @@ +,pattern, -1(top1),+1(top1),p-c-,p+c+,p-c+,p+c-,p-c=,p+c= +0,Asserts,2,6,2,2,7,7,0,0 +1,Setters,0,0,2,2,8,8,0,0 +2,Empty Rethrow,0,0,2,2,8,8,0,0 +3,Prohibited class name,0,0,2,2,8,8,0,0 +4,Force Type Casting,0,0,2,2,8,8,0,0 +5,Count If Return,0,0,2,2,8,8,0,0 +6,Implements Multi,0,0,2,2,8,8,0,0 +7,Instance of,0,0,2,2,8,8,0,0 +8,Many primary constructors,0,0,2,2,8,8,0,0 +9,Method chain,0,0,2,2,8,8,0,0 +10,Multiple try,0,0,2,2,8,8,0,0 +11,Non final attribute,0,0,2,2,8,8,0,0 +12,Null check,0,0,2,2,8,8,0,0 +13,Partial synchronized,0,0,2,2,8,8,0,0 +14,Redundant catch,0,0,2,2,8,8,0,0 +15,Return null,0,0,2,2,8,8,0,0 +16,String concat,0,1,2,2,8,8,0,0 +17,Super Method,0,0,2,2,8,8,0,0 +18,This in constructor,0,0,2,2,8,8,0,0 +19,Var declaration distance for 5 lines,0,0,2,2,8,8,0,0 +20,Var declaration distance for 7 lines,0,0,2,2,8,8,0,0 +21,Var declaration distance for 11 lines,0,0,2,2,8,8,0,0 +22,Var in the middle,0,0,2,2,8,8,0,0 +23,Array as function argument,0,0,2,2,8,8,0,0 +24,Joined validation,0,0,2,2,8,8,0,0 +25,Non final class,0,0,2,2,8,8,0,0 +26,Private static method,0,0,2,2,8,8,0,0 +27,Public static method,0,0,2,2,8,8,0,0 +28,Var siblings,0,0,2,2,8,8,0,0 +29,Null Assignment,0,0,2,2,8,8,0,0 +30,Multiple While,0,1,2,2,8,8,0,0 +31,Protected Method,0,0,2,2,8,8,0,0 +32,Send Null,0,0,2,2,8,8,0,0 +33,Nested Loop,0,0,2,2,8,8,0,0 diff --git a/test/stats/test_dataset.csv b/test/stats/test_dataset.csv new file mode 100644 index 00000000..1e2097aa --- /dev/null +++ b/test/stats/test_dataset.csv @@ -0,0 +1,141 @@ +,filename,cyclo,cyclo_method_avg,cyclo_method_min,cyclo_method_max,npath_method_avg,npath_method_min,npath_method_max,ncss,ncss_method_avg,ncss_method_min,ncss_method_max,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15,P16,P17,P18,P19,P20_5,P20_7,P20_11,P21,P22,P23,P24,P25,P26,P27,P28,P29,P30,P31,P32,M1,M2,M3_1,M3_2,M3_3,M3_4,M4,M5,M6,M7,lines_P1,lines_P2,lines_P3,lines_P4,lines_P5,lines_P6,lines_P7,lines_P8,lines_P9,lines_P10,lines_P11,lines_P12,lines_P13,lines_P14,lines_P15,lines_P16,lines_P17,lines_P18,lines_P19,lines_P20_5,lines_P20_7,lines_P20_11,lines_P21,lines_P22,lines_P23,lines_P24,lines_P25,lines_P26,lines_P28,lines_P29,lines_P30,lines_P31,lines_P32,halstead volume +2,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/impl/SerializeProcess.java,10,5,3,7,6,2,10,54,25.5,6,45,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,8,0,0,5,5,4,6,0,0,1,0,0,0,0,0,0,0,1,3.939859752,42,31471.42871,5.752808989,1138,12,13,1,23,27,[],[],[],[],[],[],[],[],[],"[32, 37, 103]",[],[],[],[],[],[],"[30, 75, 76, 78, 79, 88, 90, 91]",[],[],"[42, 88, 90, 85, 75]","[42, 88, 90, 85, 75]","[88, 90, 85, 75]","[35, 40, 65, 70, 71, 72]",[],[],[20],[],[],[],[],[],[],[66],2992.063285 +4,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/misc/LoadRegionProcessor.java,17,5.666666667,1,13,133.3333333,1,396,60,19.66666667,8,42,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3,0,0,2,2,1,2,0,0,1,1,0,0,0,0,0,0,5,3.990436554,62,2297.409524,28.95238095,167,24,38,1,23,22,[],[],[],[],[],[],[],[],[],"[33, 59, 69]",[],[],[],[],[],[],"[44, 55, 105]",[],[],"[83, 37]","[83, 37]",[83],"[58, 67]",[],[],[16],[88],[],[],[],[],[],"[39, 59, 63, 68, 74]",2450.806367 +12,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/util/Smethod.java,15,1.153846154,1,3,1.153846154,1,3,31,2.307692308,2,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.614140532,32,1232.306667,9.6,109,4,3,8,13,9,[],[],[],[],[29],[],[],[],[],[],[],[],[],[],[],[],[65],[],[],[],[],[],[],[],[],[11],[],[],[],[],[],[],[],753.9018111 +16,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ai/WaveSpawner.java,23,2.3,1,6,3.7,1,10,60,4.75,1,13,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,2,2,0,0,2,3.990057131,79,1615.439967,8.727272727,186,4,19,4,25,32,[],[],[],[],[],[],[],[],[],[90],[],"[20, 21]",[],[],[],[],[],[],[],[],[],[],[64],[],[],[17],[],[],[],[],[],[],"[50, 61]",3333.448296 +17,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ai/formations/DistanceAssignmentStrategy.java,8,1.6,1,4,1.6,1,4,25,4.4,2,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,1,3.783169581,25,1021.358333,7.384615385,85,4,6,4,16,16,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[27],[],[],[],[],[],[7],[],[],[],[],[],[],[23],659.0255751 +23,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ai/types/FlyingAI.java,23,4.6,1,11,44,1,200,54,10.6,2,21,0,0,0,0,0,0,0,0,0,6,0,0,5,0,0,0,0,0,0,0,0,0,4,0,0,1,0,0,0,0,0,0,0,0,3.585753985,58,1044.712833,7.661971831,82,4,26,3,16,14,[],[],[],[],[],[],[],[],[],"[15, 56, 70, 85, 91, 93]",[],[],"[29, 30, 35, 54, 68]",[],[],[],[],[],[],[],[],[],"[33, 72, 87, 88]",[],[],[10],[],[],[23],[],"[49, 53, 67, 84]",[],[],1635.894046 +25,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ai/types/GroundAI.java,23,7.666666667,3,10,89.33333333,4,144,52,17,8,22,0,0,0,0,1,0,0,1,0,1,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,1,0,0,0,0,0,0,0,0,3.624199757,55,1036.235412,10.05714286,91,12,31,2,18,16,[],[],[],[],[23],[],[],[22],[],[52],[],[],"[33, 61, 78, 85, 88, 92]",[],[],[],[],[],[],[],[],[],"[31, 45, 62, 87, 89, 90]",[],[],[13],[],[],"[19, 70]",[],"[59, 69]",[],[],1526.977609 +27,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/async/TeamIndexProcess.java,10,2.5,1,6,5.5,1,18,24,5.25,2,12,0,0,0,0,0,0,0,0,0,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.670000011,29,555.9942308,8.205128205,76,4,8,2,16,7,[],[],[],[],[],[],[],[],[],[42],[],"[12, 13]","[16, 36]",[],[],[],[],[],[],[],[],[],[],[],[],[11],[],[],[],[],[],[],[],572.1226517 +66,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/bullet/ArtilleryBulletType.java,5,1.25,1,2,1.25,1,2,24,5.5,2,9,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.652947588,22,1046.821321,5.333333333,80,4,1,3,13,22,[],[],[],[],[],[],[],[],[],[],[],[10],[],[],[],[],[],[27],[],[],[],[],[],[],[],[9],[],[],[],[],[],[],[],860.728309 +70,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/bullet/HealBulletType.java,7,1.4,1,3,1.4,1,3,27,4.6,2,7,0,0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.710775234,28,590.6104651,5.209302326,57,4,7,3,12,18,[],[],[],[],[],[],[],[48],[],[],[],"[11, 12, 13]",[],[],[],[],[],[46],[],[],[],[],[],[],[],[10],[],[],[],[],[],[],[],943.9985252 +71,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/bullet/LaserBulletType.java,7,1.4,1,3,1.4,1,3,46,7.8,2,23,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,2,1,0,0,0,0,1,0,0,0,0,0,0,0,1,3.770970683,44,1228.475942,4.063492063,111,4,3,1,15,31,[],[],[],[],[],[],[],[],[],[],[],"[13, 14, 15, 16, 17, 18]",[],[],[],[],[],[],[],"[58, 65]",[65],[],[],[],[],[12],[],[],[],[],[],[],[64],1858.847147 +76,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/units/AIController.java,10,1.428571429,1,2,1.428571429,1,2,28,2.857142857,1,4,0,0,0,1,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.663448777,25,2233.004625,10.43478261,193,12,3,5,13,12,[],[],[],[13],[],[],[],[],[],[],[],"[17, 18, 19]","[27, 32, 42]",[],[],[],[],[],[],[],[],[],[],[],[],[13],[],[],[],[],"[25, 30, 35, 39, 47]",[],[],915.3999076 +86,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/game/SoundLoop.java,9,3,1,6,4.333333333,1,10,29,8,3,16,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.590065993,34,1158.554878,16,90,16,13,2,22,8,[],[],[],[],[],[],[],[],[],[],[],"[12, 13]",[],[],[],[],[],[],[],[],[],[],[],[],[],[8],[],[],[],[],[],[],[],629.7513016 +98,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/graphics/MultiPacker.java,9,1.5,1,2,1.5,1,2,23,3,2,5,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.831839087,31,1182.61945,30.51162791,79,24,3,2,10,13,[],[],[],[],[],[],[],[],[],[],[],[10],[20],[],[],[],[],[],[],[],[],[],[],[],[],[9],[],[],[],[],[],[],[],687.9818059 +103,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/graphics/Trail.java,4,2,2,2,2,2,2,21,8.5,8,9,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,3.774572876,19,1164.422348,5,112,4,2,2,14,21,[],[],[],[],[],[],[],[],[],[38],[],"[12, 13]",[],[],[],[],[],[],[],[],[],[],[36],[],[],[10],[],[],[],[],[],[],[],1137.251495 +139,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/mod/ModLoadingSound.java,25,1,1,1,1,1,1,52,2,2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.422279822,52,446.7001558,7.775700935,48,4,25,12,8,9,[],[],[],[],[],[],[],[],[],[],[],[9],[],[],[],[],[],[],[],[],[],[],[],[],[],[8],[],[],[],[],[],[],[],1005.84441 +143,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/net/Interpolator.java,11,3.666666667,1,6,6.333333333,1,10,40,10.66666667,8,13,0,0,0,0,0,0,0,0,0,1,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.65956103,46,465.5395928,8.784313725,56,8,14,2,16,17,[],[],[],[],[],[],[],[],[],[50],[],"[9, 10, 11, 12, 13, 16, 17]",[],[],[],[],[],[],[],[],[],[],[],[],[],[7],[],[],[],[],[],[],[],933.1709271 +157,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/type/Weapon.java,3,1,1,1,1,1,1,30,2,2,2,0,0,0,0,0,0,0,0,0,0,0,23,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.621808999,30,876.5333333,1.969230769,94,4,0,22,9,27,[],[],[],[],[],[],[],[],[],[],[],"[14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58]",[],[],[],[],[69],[],[],[],[],[],[],[],[],[12],[],[],[],[],[],[],[],733.996792 +159,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ui/BorderImage.java,6,1,1,1,1,1,1,24,3.5,1,9,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,3.599816366,22,535.6614509,5.189189189,65,4,0,1,9,16,[],[],[],[],[],[],[],[],[],[],[],"[10, 11]",[],[],[],[],[],[38],[],[],[],[],"[40, 41]",[],[],[9],[],[],[],[],[],[],[],513.6912619 +160,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ui/ContentDisplay.java,12,3,2,5,5.5,2,14,79,19.5,16,22,0,0,0,0,8,0,0,0,0,27,0,0,4,0,0,0,6,0,0,0,0,0,3,0,0,1,0,4,4,4,4,0,0,2,3.394313563,98,2527.714617,4.64957265,126,4,10,4,21,24,[],[],[],[],"[90, 92, 94, 121, 123, 125, 127, 129]",[],[],[],[],"[20, 21, 26, 31, 34, 45, 49, 51, 55, 67, 68, 73, 78, 81, 85, 101, 102, 107, 112, 115, 119, 135, 136, 141, 146, 149, 153]",[],[],"[30, 77, 111, 145]",[],[],[],"[21, 45, 51, 68, 102, 136]",[],[],[],[],[],"[18, 38, 52]",[],[],[13],[],"[15, 64, 98, 133]",[],[],[],[],"[48, 53]",3895.496796 +192,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ui/dialogs/ModsDialog.java,10,2,1,4,78671.6,2,393344,37,7.2,2,14,0,0,0,0,0,0,0,0,0,35,1,0,5,0,0,0,4,2,0,3,3,3,5,0,0,1,0,0,0,0,0,0,0,0,3.861373085,165,1798.303302,10.76106195,166,16,6,3,57,59,[],[],[],[],[],[],[],[],[],"[26, 59, 60, 63, 64, 69, 75, 78, 94, 134, 137, 145, 154, 158, 159, 162, 181, 182, 184, 185, 187, 193, 198, 212, 215, 218, 227, 257, 259, 261, 263, 266, 268, 272, 274]",[54],[],"[47, 164, 175, 265, 271]",[],[],[],"[99, 106, 149, 184]","[173, 173]",[],"[132, 139, 129]","[132, 139, 129]","[132, 139, 129]","[66, 67, 70, 72, 147]",[],[],[20],[],[],[],[],[],[],[],6282.136787 +194,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ui/dialogs/PausedDialog.java,18,4.5,1,8,183.5,3,693,53,12.25,2,34,0,0,0,0,0,0,0,0,0,11,0,3,3,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.794840949,90,2472.623745,8.971962617,207,4,20,2,26,29,[],[],[],[],[],[],[],[],[],"[38, 40, 54, 59, 74, 77, 86, 91, 93, 95, 126]",[],"[10, 11, 12]","[74, 94, 119]",[],[],[],[129],[],[],[],[],[],[],[],[],[9],[],[],[],[],[],[],[],2702.173173 +199,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ui/dialogs/ZoneInfoDialog.java,5,1.666666667,1,3,5334,1,16000,42,13.33333333,3,33,0,0,0,0,0,0,0,2,0,21,0,1,0,0,0,0,1,0,0,3,3,1,8,0,0,1,0,0,0,0,0,0,0,0,3.771168749,107,1690.619687,10.35294118,162,20,8,2,38,37,[],[],[],[],[],[],[],"[66, 82]",[],"[49, 50, 60, 63, 64, 70, 73, 74, 75, 85, 86, 89, 90, 91, 100, 102, 105, 106, 111, 118, 146]",[],[19],[],[],[],[],[50],[],[],"[167, 54, 166]","[167, 54, 166]",[167],"[36, 37, 38, 66, 82, 116, 129, 152]",[],[],[18],[],[],[],[],[],[],[],3822.4713 +200,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ui/fragments/BlockConfigFragment.java,6,1,1,1,3.166666667,1,12,26,3.833333333,2,9,0,0,0,0,0,0,0,0,0,0,0,2,4,0,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.701410568,41,896.3440559,12.8,96,16,6,1,14,9,[],[],[],[],[],[],[],[],[],[],[],"[16, 17]","[39, 58, 64, 74]",[],[],[],[],"[29, 29]",[],[],[],[],[],[],[],[15],[],[],"[32, 78]",[],[],[],[],1153.852701 +204,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ui/fragments/LoadingFragment.java,7,1,1,1,1,1,1,33,4.142857143,2,9,0,0,0,0,1,0,0,0,0,9,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.552156276,46,1047.432836,4.363636364,76,4,1,3,13,14,[],[],[],[],[42],[],[],[],[],"[24, 25, 27, 29, 47, 52, 53, 61, 65]",[],"[15, 16, 17]",[],[],[],[],[],[],[],[],[],[],[],[],[],[14],[],[],[],[],[],[],[],1573.101324 +217,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/world/Edges.java,15,2.666666667,1,4,2.4,1,3,43,3.8,2,5,0,0,0,0,5,0,0,0,0,0,0,3,0,0,0,0,3,0,0,0,0,0,1,0,3,1,0,5,5,5,5,0,0,0,3.900622602,45,1688.699637,20.38596491,98,20,14,5,13,20,[],[],[],[],"[23, 24, 46, 47, 66]",[],[],[],[],[],[],"[13, 14, 15]",[],[],[],[],"[65, 70, 76]",[],[],[],[],[],[58],[],"[64, 70, 76]",[10],[],"[52, 56, 63, 69, 75]",[],[],[],[],[],2158.561239 +231,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/world/blocks/environment/StaticWall.java,8,2,1,5,3,1,5,24,5.25,2,10,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.834636845,23,1213.393235,7.441860465,127,8,16,1,21,16,[],[],[],[],[],[],[],[],[],[],[],"[14, 15]",[],[],[],[],[],[41],[],[],[],[],[],[],[],[13],[],[],[],[],[],[],[],1202.162007 +276,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/tools/src/mindustry/tools/Edgifier.java,23,7.666666667,1,12,21.33333333,1,54,51,16.66666667,4,30,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,7,6,5,3,1,0,1,2,1,1,1,1,0,0,6,3.888543095,51,1062.805464,48,103,20,34,1,21,21,[],[],[],[],[],[],[],[],[],[15],[],[],[],[],[],[],[85],[],[],"[66, 29, 30, 52, 53, 54, 55]","[66, 30, 52, 53, 54, 55]","[66, 52, 53, 54, 55]","[13, 45, 47]",[10],[],[8],"[18, 75]",[10],[],[],[],[],"[23, 28, 33, 34, 63, 64]",1535.076491 +286,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/eng/code-quality-reports/src/main/java/com/azure/tools/checkstyle/checks/FluentMethodNameCheck.java,16,2,1,5,3.25,1,12,39,4.5,2,14,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,3.842457293,44,1380.093508,31.64044944,89,24,13,4,17,20,[],[],[],[],[],[],[],[],[],[66],[],[],[75],[],[],[],[],[],[],[],[],[],"[105, 111]",[],[],[25],[],[],[],[],[],[],[],1496.204601 +287,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/eng/code-quality-reports/src/main/java/com/azure/tools/checkstyle/checks/GoodLoggingCheck.java,26,2.888888889,1,9,6.222222222,1,18,65,6.111111111,2,26,0,0,0,0,0,0,0,0,0,1,0,2,4,0,0,0,1,0,0,0,0,0,6,0,0,1,0,0,0,0,0,0,0,0,4.020125916,77,1308.198911,21.17293233,108,24,24,4,18,34,[],[],[],[],[],[],[],[],[],[97],[],"[42, 44]","[107, 129, 145, 177]",[],[],[],[158],[],[],[],[],[],"[87, 106, 110, 155, 157, 176]",[],[],[30],[],[],[],[],[],[],[],2532.831319 +294,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/eng/code-quality-reports/src/main/java/com/azure/tools/checkstyle/checks/OnlyFinalFieldsForImmutableClassCheck.java,11,1.571428571,1,4,1.571428571,1,4,31,3.857142857,2,11,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.869526056,30,718.4571655,25.18032787,65,24,7,1,16,16,[],[],[],[],[],[],[],[],[],[],[],[25],[75],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],885.2880685 +311,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/advisor/mgmt-v2017_04_19/src/main/java/com/microsoft/azure/management/advisor/v2017_04_19/implementation/AdvisorManagementClientImpl.java,20,1,1,1,1,1,1,67,2.75,2,11,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.525406548,67,1452.685859,5.508196721,115,4,0,3,11,14,[],[],[],[],[],[],[],[],[],[],[],"[21, 32, 55, 67, 90, 113, 138, 151, 164, 177, 190]",[],[],[],[],[],[250],[],[],[],[],[],[],[],[19],[],[],[],[],[230],[],[],969.9847335 +318,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/advisor/mgmt-v2017_04_19/src/main/java/com/microsoft/azure/management/advisor/v2017_04_19/implementation/MetadataEntityInner.java,14,1,1,1,1,1,1,43,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.529927294,43,998.4315101,5.217391304,100,4,0,7,10,7,[],[],[],[],[],[],[],[],[],[],[],"[26, 32, 38, 44, 50, 56, 62]",[],[],[],[],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[],[],[],803.6328007 +329,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/advisor/mgmt-v2017_04_19/src/main/java/com/microsoft/azure/management/advisor/v2017_04_19/implementation/ResourceRecommendationBaseImpl.java,17,1,1,1,1,1,1,42,2.235294118,2,5,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,3.500027641,41,1379.775209,8.091954023,155,8,14,17,10,5,[],[],[],[],[],[],[],[],[],[],[],"[25, 26]",[],[],[],[],[],[],[],[],[],[],[],[],[],[24],[],[],[],[],[43],[30],[],1138.701569 +338,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/ApiContractProperties.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.486121033,31,1024.597596,5.5,92,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 37, 43, 49]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],480 +382,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/ApiTagResourceContractProperties.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.424208984,31,908.6913462,5.5,81,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 34, 43, 49]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],456.6502135 +408,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/BackendServiceFabricClusterProperties.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.500261741,31,2255.285096,5.5,122,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 34, 41, 47]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],510.28222 +418,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/CertificateConfiguration.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.356275877,25,1303.388244,5.538461538,90,4,0,4,8,4,[],[],[],[],[],[],[],[],[],[],[],"[23, 29, 37, 43]",[],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[],[],[],385.4360569 +438,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/EmailTemplateUpdateParameters.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.440958761,31,1240.676165,5.333333333,113,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[24, 30, 36, 42, 48]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],511.8225751 +459,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/IdentityProviderUpdateParameters.java,18,1,1,1,1,1,1,55,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.461369569,55,1371.060717,5.333333333,103,4,0,9,10,9,[],[],[],[],[],[],[],[],[],[],[],"[25, 31, 37, 43, 49, 55, 61, 69, 77]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],888.0090376 +479,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/OperationUpdateContract.java,16,1,1,1,1,1,1,49,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.520423805,49,1070.192842,5.333333333,99,4,0,8,10,8,[],[],[],[],[],[],[],[],[],[],[],"[24, 30, 36, 42, 48, 54, 61, 69]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],816.5245612 +495,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/ProductUpdateParameters.java,14,1,1,1,1,1,1,43,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.425306428,43,1043.12809,5.393258427,87,4,0,7,8,7,[],[],[],[],[],[],[],[],[],[],[],"[23, 31, 43, 54, 63, 72, 78]",[],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[],[],[],658.9788966 +506,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/RequestContract.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.432155744,25,1219.388244,5.538461538,90,4,0,4,10,4,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 34, 40]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],399.4105502 +565,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiContractInner.java,32,1,1,1,1,1,1,97,2.46875,2,3,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.676430879,97,1321.448713,5.126213592,128,4,0,17,10,18,[],[],[],[],[],[],[],[],[],[],[],"[30, 36, 42, 48, 55, 61, 67, 73, 79, 85, 91, 98, 104, 110, 119, 125, 131]",[],[],[],[],[],[],[],[],[],[],[],[],[],[25],[],[],[],[],[],[],[],2008.986433 +566,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiDiagnosticContractImpl.java,25,1.041666667,1,2,1.041666667,1,2,80,3,2,8,0,0,0,0,0,0,1,0,2,0,0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.546957283,78,1672.83641,9.647058824,189,8,17,17,11,17,[],[],[],[],[],[],[18],[],"[27, 35]",[],[],"[20, 21, 22, 23, 24, 25]",[75],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[68],[],[],1918.465584 +577,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiIssuesImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.701503209,27,2443.697203,7.907692308,122,5,4,3,15,9,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1139.786098 +592,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiOperationPolicysImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.659541112,25,2904.658475,7.627118644,129,5,4,3,14,9,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1083.144826 +594,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiOperationsImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.701503209,27,2720.743357,7.907692308,122,5,4,3,15,9,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1139.786098 +597,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiPolicysImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.652985697,25,2056.929661,7.627118644,103,5,4,3,14,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1010.811838 +601,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiProductsInner.java,36,2,1,9,4.777777778,1,32,70,3.285714286,1,14,0,0,0,0,0,0,0,0,0,0,3,2,12,0,0,0,0,0,0,0,0,0,6,0,0,1,0,0,0,0,0,0,0,0,3.852134193,115,6440.144857,12.37174721,287,12,9,6,24,16,[],[],[],[],[],[],[],[],[],[],"[441, 330, 183]","[39, 41]","[146, 164, 167, 170, 173, 291, 314, 317, 320, 323, 418, 434]",[],[],[],[],[],[],[],[],[],"[176, 177, 178, 179, 326, 437]",[],[],[37],[],[],"[177, 178, 179]",[],[],[],[],6682.982654 +613,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiVersionSetContractInner.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.452228502,31,1309.667032,5.253731343,91,4,0,5,8,5,[],[],[],[],[],[],[],[],[],[],[],"[25, 32, 39, 45, 53]",[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],573.0398702 +614,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApiVersionSetsImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.679467652,27,2621.235664,7.876923077,110,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1077.897763 +616,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ApisImpl.java,10,1,1,1,1,1,1,28,2.6,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.708445773,33,2138.902324,8.096385542,105,4,5,4,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[],[],[],1385.206852 +620,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/AuthorizationServersImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.679467652,27,2906.651049,7.876923077,116,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1077.897763 +623,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/BackendContractInner.java,18,1,1,1,1,1,1,55,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.549713958,55,946.3523109,5.109243697,85,4,0,9,8,9,[],[],[],[],[],[],[],[],[],[],[],"[29, 35, 42, 48, 54, 60, 66, 72, 78]",[],[],[],[],[],[],[],[],[],[],[],[],[],[24],[],[],[],[],[],[],[],1134.12825 +624,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/BackendsImpl.java,10,1,1,1,1,1,1,28,2.6,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.672676384,29,2390.548491,7.771428571,104,4,5,4,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1190.318487 +628,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/CachesImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.679467652,27,2087.758741,7.876923077,102,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1077.897763 +630,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/CertificateContractImpl.java,20,1.052631579,1,2,1.052631579,1,2,66,3.052631579,2,8,0,0,0,0,0,0,1,0,2,0,0,6,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.630749256,70,1848.471355,11.64516129,199,12,10,10,13,14,[],[],[],[],[],[],[19],[],"[28, 37]",[],[],"[20, 21, 22, 23, 24, 25]",[91],[],[],[86],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[84],[],[],1679.844226 +639,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/DiagnosticContractInner.java,12,1,1,1,1,1,1,37,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.493828513,37,1270.848088,5.135802469,103,4,0,6,8,6,[],[],[],[],[],[],[],[],[],[],[],"[28, 34, 40, 46, 52, 59]",[],[],[],[],[],[],[],[],[],[],[],[],[],[22],[],[],[],[],[],[],[],743.3982714 +640,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/DiagnosticsImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.688084704,27,2527.82028,7.876923077,110,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1083.639 +655,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/IdParsingUtils.java,14,7,7,7,12,12,12,31,15,14,16,0,0,0,0,0,2,0,0,0,0,0,0,4,0,0,2,0,0,0,0,0,0,5,0,0,1,0,2,2,2,2,0,0,0,3.470006721,34,571.5208141,10.43478261,66,4,30,0,16,7,[],[],[],[],[],"[24, 46]",[],[],[],[],[],[],"[15, 22, 36, 44]",[],[],"[32, 55]",[],[],[],[],[],[],"[18, 19, 39, 40, 41]",[],[],[13],[],"[14, 35]",[],[],[],[],[],613.1066574 +657,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/IdentityProviderContractInner.java,18,1,1,1,1,1,1,55,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.544175112,55,1545.795756,5.24137931,122,4,0,9,10,9,[],[],[],[],[],[],[],[],[],[],[],"[27, 33, 39, 45, 51, 57, 63, 71, 79]",[],[],[],[],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[],[],[],1003.27739 +660,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/IssueAttachmentContractImpl.java,19,1.055555556,1,2,1.055555556,1,2,68,3.277777778,2,9,0,0,0,0,0,0,1,0,2,0,0,7,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.511871109,66,2253.08432,10.27522936,232,8,11,11,11,15,[],[],[],[],[],[],[15],[],"[25, 33]",[],[],"[17, 18, 19, 20, 21, 22, 23]",[74],[],[],[],[],[],[],[],[],[],[],[],[],[15],[],[],[],[],[67],[],[],1607.633004 +662,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/IssueCommentContractImpl.java,19,1.055555556,1,2,1.055555556,1,2,68,3.277777778,2,9,0,0,0,0,0,0,1,0,2,0,0,7,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.534820737,66,1948.880098,10.18181818,202,8,11,11,11,15,[],[],[],[],[],[],[16],[],"[26, 34]",[],[],"[18, 19, 20, 21, 22, 23, 24]",[75],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[68],[],[],1656.163989 +664,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/IssueContractImpl.java,25,1.041666667,1,2,1.041666667,1,2,80,3,2,8,0,0,0,0,0,0,1,0,2,0,0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.5228709,78,1142.217375,9.718518519,136,8,17,17,11,16,[],[],[],[],[],[],[17],[],"[26, 34]",[],[],"[19, 20, 21, 22, 23, 24]",[74],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[67],[],[],1857.172754 +668,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/LoggerContractImpl.java,24,1.043478261,1,2,1.043478261,1,2,74,2.913043478,2,7,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.548291729,72,1363.709536,10.15873016,169,8,15,15,11,15,[],[],[],[],[],[],[18],[],"[26, 34]",[],[],"[19, 20, 21, 22, 23]",[73],[],[],[68],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[66],[],[],1662.208383 +677,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/NetworkStatusInner.java,25,2.272727273,1,9,5.181818182,1,32,45,3.142857143,1,11,0,0,0,0,0,0,0,0,0,0,2,2,7,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,3.911527,62,7470.704058,11.29411765,368,12,7,6,23,10,[],[],[],[],[],[],[],[],[],[],"[131, 222]","[35, 37]","[118, 121, 124, 206, 209, 212, 215]",[],[],[],[],[],[],[],[],[],"[127, 218]",[],[],[33],[],[],[],[],[],[],[],3162.729612 +678,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/NotificationContractImpl.java,17,1.0625,1,2,1.0625,1,2,55,3,2,7,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.555414914,53,1750.865363,11.35483871,195,8,8,8,11,10,[],[],[],[],[],[],[18],[],"[26, 34]",[],[],"[19, 20, 21, 22, 23]",[73],[],[],[68],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[66],[],[],1335.731813 +682,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/NotificationRecipientUsersImpl.java,8,1,1,1,1,1,1,22,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.64764421,21,3304.386054,7.333333333,140,4,3,4,14,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],856.729083 +686,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/OpenIdConnectProvidersImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.679467652,27,2997.112587,7.876923077,118,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1077.897763 +688,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/OpenidConnectProviderContractImpl.java,24,1.043478261,1,2,1.043478261,1,2,74,2.913043478,2,7,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.494117997,72,2332.71329,10.32258065,274,8,15,15,11,15,[],[],[],[],[],[],[16],[],"[24, 32]",[],[],"[17, 18, 19, 20, 21]",[71],[],[],[66],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[64],[],[],1554.045237 +689,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/OpenidConnectProviderContractInner.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.431071498,31,1354.545907,5.333333333,89,4,0,5,8,5,[],[],[],[],[],[],[],[],[],[],[],"[24, 30, 36, 42, 48]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],542.8421252 +708,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/PolicySnippetsInner.java,22,2.2,1,7,4,1,16,41,3.25,1,10,0,0,0,0,0,0,0,0,0,0,2,2,6,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,3.886065439,56,7019.255429,10.24,367,12,6,4,19,10,[],[],[],[],[],[],[],[],[],[],"[128, 209]","[35, 37]","[114, 117, 120, 196, 199, 202]",[],[],[],[],[],[],[],[],[],"[123, 124, 205]",[],[],[33],[],[],[124],[],[],[],[],2641.82016 +709,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/PolicysImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.639530276,25,1855.641525,7.593220339,93,4,4,3,14,7,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],950.0526564 +711,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/PortalDelegationSettingsImpl.java,9,1,1,1,1,1,1,21,2.111111111,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.279009978,21,916.3483087,7.441860465,80,4,7,8,7,2,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[],[],[],427.8545881 +718,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ProductApisImpl.java,8,1,1,1,1,1,1,22,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.699172823,23,2551.247811,7.703703704,131,4,3,4,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],927.6163382 +720,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ProductContractImpl.java,28,1.037037037,1,2,1.037037037,1,2,84,2.851851852,2,7,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.518636735,82,1227.177885,9.846153846,160,8,19,19,11,17,[],[],[],[],[],[],[17],[],"[25, 33]",[],[],"[18, 19, 20, 21, 22]",[72],[],[],[67],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[65],[],[],1789.714075 +734,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/PropertyContractInner.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.500297709,25,948.287987,5.236363636,66,4,0,4,10,4,[],[],[],[],[],[],[],[],[],[],[],"[26, 33, 40, 47]",[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],485.9729501 +747,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/RecipientEmailContractImpl.java,12,1,1,1,1,1,1,44,3.166666667,2,8,0,0,0,0,0,0,1,0,2,0,0,4,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.510530758,41,2380.129376,12,199,8,4,7,11,8,[],[],[],[],[],[],[16],[],"[23, 31]",[],[],"[18, 19, 20, 21]",[71],[],[],[66],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[64],[],[],1151.502344 +757,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/ReportRecordContractImpl.java,28,1,1,1,1,1,1,59,2.035714286,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.205808544,59,608.5372454,7.86440678,81,4,26,27,7,2,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[15],[],[],[],[],[],[],[],991.876511 +761,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/RequestReportRecordContractImpl.java,20,1,1,1,1,1,1,43,2.05,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.224122654,43,759.0232558,7.813953488,102,4,18,19,7,2,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[15],[],[],[],[],[],[],[],727.1194771 +772,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/SubscriptionContractInner.java,25,1,1,1,1,1,1,76,2.48,2,3,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.602959344,76,1101.494929,5.232704403,88,4,0,13,8,14,[],[],[],[],[],[],[],[],[],[],[],"[28, 34, 40, 54, 61, 71, 81, 91, 99, 105, 111, 117, 123]",[],[],[],[],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[],[],[],1430.553768 +773,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/SubscriptionsImpl.java,11,1,1,1,1,1,1,31,2.636363636,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.670406695,31,2822.712105,7.68,107,4,6,5,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1310.330254 +775,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/TagContractImpl.java,12,1,1,1,1,1,1,47,3.333333333,2,9,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.487090368,44,1878.658772,11.52,148,8,4,7,11,9,[],[],[],[],[],[],[15],[],"[23, 31]",[],[],"[17, 18, 19, 20, 21]",[72],[],[],[],[],[],[],[],[],[],[],[],[],[15],[],[],[],[],[65],[],[],1231.294979 +782,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/TagResourceContractInner.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.36716066,25,1536.833442,5.236363636,97,4,0,4,8,4,[],[],[],[],[],[],[],[],[],[],[],"[25, 31, 37, 43]",[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],495.157557 +791,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/TenantConfigurationSyncStateContractImpl.java,9,1,1,1,1,1,1,21,2.111111111,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.317922301,21,1561.83278,7.619047619,129,4,7,8,7,2,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[15],[],[],[],[],[],[],[],391.3815085 +792,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/TenantConfigurationSyncStateContractInner.java,14,1,1,1,1,1,1,43,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.406747358,43,1238.294944,5.454545455,112,4,0,7,8,7,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 35, 42, 48, 56, 64]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],620.6704666 +803,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2018_06_01_preview/src/main/java/com/microsoft/azure/management/apimanagement/v2018_06_01_preview/implementation/UserIdentitiesInner.java,23,1.769230769,1,9,3.769230769,1,32,47,2.875,1,11,0,0,0,0,0,0,0,0,0,0,2,2,7,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,3.842034582,78,5514.61345,12.33333333,236,12,5,6,24,13,[],[],[],[],[],[],[],[],[],[],"[291, 180]","[39, 41]","[146, 164, 167, 170, 173, 268, 284]",[],[],[],[],[],[],[],[],[],"[176, 287]",[],[],[37],[],[],[],[],[],[],[],4458.866703 +823,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ApiEntityBaseContract.java,23,1,1,1,1,1,1,70,2.47826087,2,3,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.478239696,70,1344.164483,5.408450704,133,4,0,12,8,13,[],[],[],[],[],[],[],[],[],[],[],"[21, 27, 33, 39, 46, 52, 58, 64, 70, 76, 82, 89]",[],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[],[],[],1037.960235 +838,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ApiManagementServiceBaseProperties.java,26,1,1,1,1,1,1,79,2.307692308,2,3,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.698344928,79,1961.478626,5.023255814,132,4,0,18,10,19,[],[],[],[],[],[],[],[],[],[],[],"[24, 32, 39, 47, 53, 59, 65, 71, 77, 83, 90, 98, 104, 110, 147, 154, 163, 175]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],1639.042094 +862,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ApiTagResourceContractProperties.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.424208984,31,897.2028846,5.5,81,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 34, 43, 49]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],456.6502135 +864,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ApiUpdateContract.java,31,1,1,1,1,1,1,94,2.483870968,2,3,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.590057626,94,1235.631537,5.305699482,129,4,0,16,10,17,[],[],[],[],[],[],[],[],[],[],[],"[24, 30, 36, 42, 49, 55, 61, 67, 73, 79, 85, 92, 98, 104, 113, 119]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],1625.721678 +865,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ApiVersionSetContractDetails.java,12,1,1,1,1,1,1,37,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.28292717,37,940.6147368,5.546666667,85,4,0,6,8,6,[],[],[],[],[],[],[],[],[],[],[],"[23, 29, 35, 43, 50, 57]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],479.4448513 +877,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/AuthorizationServerUpdateContract.java,32,1,1,1,1,1,1,97,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.632854957,97,1786.027739,5.306532663,134,4,0,16,10,16,[],[],[],[],[],[],[],[],[],[],[],"[26, 33, 42, 51, 57, 65, 73, 79, 85, 92, 99, 105, 113, 120, 127, 133]",[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1750.298052 +888,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/BackendServiceFabricClusterProperties.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.500261741,31,2243.796635,5.5,122,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 34, 41, 47]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],510.28222 +890,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/BackendUpdateParameters.java,18,1,1,1,1,1,1,55,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.470376452,55,969.540444,5.380530973,88,4,0,9,8,9,[],[],[],[],[],[],[],[],[],[],[],"[23, 29, 36, 42, 48, 54, 60, 66, 72]",[],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[],[],[],859.0506061 +906,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ConnectivityStatusContract.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.442711808,31,1012.702885,5.5,83,4,0,5,8,5,[],[],[],[],[],[],[],[],[],[],[],"[24, 31, 37, 48, 56]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],490.4654347 +918,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/EmailTemplateUpdateParameters.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.440958761,31,1229.528268,5.333333333,113,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[24, 30, 36, 42, 48]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],511.8225751 +926,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/GroupContractProperties.java,9,1,1,1,1,1,1,28,2.444444444,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.404632496,28,856.8720047,5.517241379,73,4,0,5,8,6,[],[],[],[],[],[],[],[],[],[],[],"[21, 27, 34, 40, 49]",[],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[],[],[],421.9925912 +928,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/GroupCreateParameters.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.376482306,25,861.7009085,5.433962264,66,4,0,4,8,4,[],[],[],[],[],[],[],[],[],[],[],"[23, 29, 35, 44]",[],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[],[],[],400 +931,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/GroupUpdateParameters.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.334226614,25,807.814116,5.433962264,66,4,0,4,8,4,[],[],[],[],[],[],[],[],[],[],[],"[23, 29, 35, 44]",[],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[],[],[],382.7374665 +935,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/IdentityProviderBaseParameters.java,14,1,1,1,1,1,1,43,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.413245449,43,1308.317926,5.454545455,101,4,0,7,10,7,[],[],[],[],[],[],[],[],[],[],[],"[23, 29, 35, 41, 47, 53, 59]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],615 +943,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/IssueUpdateContract.java,12,1,1,1,1,1,1,37,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.413439087,37,710.3609218,5.333333333,66,4,0,6,8,6,[],[],[],[],[],[],[],[],[],[],[],"[24, 31, 37, 43, 49, 55]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],596.0559466 +948,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/LoggerUpdateContract.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.484250996,25,980.0653199,5.333333333,78,4,0,4,10,4,[],[],[],[],[],[],[],[],[],[],[],"[25, 31, 37, 44]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],442.6092672 +956,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/OperationDisplay.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.246206256,25,622.3770739,5.647058824,61,4,0,4,8,4,[],[],[],[],[],[],[],[],[],[],[],"[21, 27, 33, 39]",[],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[],[],[],321.0790765 +957,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/OperationEntityBaseContract.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.46793057,31,1179.702885,5.5,103,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 34, 40, 46]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],471.0614972 +959,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/OperationTagResourceContractProperties.java,9,1,1,1,1,1,1,28,2.111111111,2,3,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.37063405,28,1267.921635,5,77,4,0,8,8,9,[],[],[],[],[],[],[],[],[],[],[],"[21, 27, 33, 39, 45, 51, 58, 66]",[],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[],[],[],521.7488087 +961,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ParameterContract.java,12,1,1,1,1,1,1,37,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.441366342,37,689.9316473,5.473684211,64,4,0,6,10,6,[],[],[],[],[],[],[],[],[],[],[],"[22, 28, 34, 40, 46, 52]",[],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[],[],[],554.8699868 +970,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ProductEntityBaseParameters.java,12,1,1,1,1,1,1,37,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.369959549,37,1035.46807,5.546666667,91,4,0,6,8,6,[],[],[],[],[],[],[],[],[],[],[],"[21, 29, 41, 52, 61, 70]",[],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[],[],[],495.5140615 +977,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/ProductUpdateParameters.java,14,1,1,1,1,1,1,43,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.425306428,43,1040.387516,5.393258427,87,4,0,7,8,7,[],[],[],[],[],[],[],[],[],[],[],"[23, 31, 43, 54, 63, 72, 78]",[],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[],[],[],658.9788966 +982,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/PropertyUpdateParameters.java,8,1,1,1,1,1,1,25,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.404389007,25,812.1393939,5.333333333,69,4,0,4,10,4,[],[],[],[],[],[],[],[],[],[],[],"[25, 32, 39, 46]",[],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[],[],[],420 +1005,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/SubscriptionCreateParameters.java,14,1,1,1,1,1,1,43,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.441311832,43,1020.387516,5.393258427,76,4,0,7,8,7,[],[],[],[],[],[],[],[],[],[],[],"[24, 30, 36, 43, 50, 66, 72]",[],[],[],[],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[],[],[],684.8243127 +1047,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiDiagnosticsImpl.java,9,1,1,1,1.111111111,1,2,25,2.555555556,2,3,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.727279122,31,2915.591925,8.376811594,130,5,4,3,18,9,[],[],[],[],[79],[76],[],[],[],[],[],[],[76],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1225.791038 +1053,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiIssueAttachmentsImpl.java,9,1,1,1,1.111111111,1,2,25,2.555555556,2,3,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.734121196,31,3663.475983,8.376811594,154,5,4,3,18,10,[],[],[],[],[79],[76],[],[],[],[],[],[],[76],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1300.805936 +1055,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiIssueCommentsImpl.java,9,1,1,1,1.111111111,1,2,25,2.555555556,2,3,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.734121196,31,3377.389027,8.376811594,151,5,4,3,18,10,[],[],[],[],[79],[76],[],[],[],[],[],[],[76],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1300.805936 +1061,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiManagementOperationsInner.java,17,1.307692308,1,3,1.615384615,1,4,40,2.4375,1,5,0,0,0,0,0,0,0,0,0,0,2,2,4,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,3.743741849,69,3710.998391,12.55248619,203,12,2,6,24,9,[],[],[],[],[],[],[],[],[],[],"[265, 154]","[38, 40]","[133, 148, 242, 258]",[],[],[],[],[],[],[],[],[],[261],[],[],[36],[],[],[],[],[],[],[],3796.246776 +1066,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiManagementServiceResourceImpl.java,30,1,1,1,1,1,1,63,2.033333333,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.351028557,63,853.0897059,7.348148148,88,4,28,29,9,2,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[24],[],[],[],[],[],[],[],1482.365421 +1069,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiManagementServiceSkusInner.java,23,1.769230769,1,9,3.769230769,1,32,46,2.8125,1,10,0,0,0,0,0,0,0,0,0,0,2,2,7,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,3.814815896,78,5921.102067,12.39790576,247,12,5,6,24,11,[],[],[],[],[],[],[],[],[],[],"[179, 295]","[39, 41]","[146, 164, 167, 170, 173, 271, 288]",[],[],[],[],[],[],[],[],[],[291],[],[],[37],[],[],[],[],[],[],[],4278.431282 +1072,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiOperationPolicysImpl.java,9,1,1,1,1.111111111,1,2,25,2.555555556,2,3,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.693880518,29,2908.626736,8.158730159,129,5,4,3,18,9,[],[],[],[],[73],[70],[],[],[],[],[],[],[70],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1168.590142 +1076,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiPolicyContractImpl.java,17,1.0625,1,2,1.0625,1,2,58,3.1875,2,7,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.522979376,56,1483.090206,11,161,8,9,9,11,12,[],[],[],[],[],[],[16],[],"[24, 32]",[],[],"[18, 19, 20, 21, 22]",[71],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[64],[],[],1370.729153 +1077,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiPolicysImpl.java,9,1,1,1,1.111111111,1,2,25,2.555555556,2,3,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.686354501,29,2067.610863,8.158730159,103,5,4,3,18,8,[],[],[],[],[73],[70],[],[],[],[],[],[],[70],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1094.778091 +1081,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiProductsInner.java,40,2.222222222,1,11,8.333333333,1,64,72,3.380952381,1,15,0,0,0,0,0,0,0,0,0,0,3,2,14,0,0,0,0,0,0,0,0,0,4,0,0,1,0,0,0,0,0,0,0,0,3.848330423,121,6554.447702,12.42490842,287,12,11,6,24,15,[],[],[],[],[],[],[],[],[],[],"[185, 435, 324]","[39, 41]","[146, 164, 167, 170, 173, 176, 285, 306, 309, 312, 315, 318, 412, 428]",[],[],[],[],[],[],[],[],[],"[179, 180, 181, 431]",[],[],[37],[],[],"[179, 180, 181]",[],[],[],[],6724.707832 +1092,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiTagDescriptionsImpl.java,9,1,1,1,1.111111111,1,2,25,2.555555556,2,3,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.727279122,31,2872.490476,8.376811594,131,5,4,3,18,9,[],[],[],[],[79],[76],[],[],[],[],[],[],[76],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1225.791038 +1094,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiVersionSetContractImpl.java,24,1.043478261,1,2,1.043478261,1,2,74,2.913043478,2,7,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.512234276,72,1664.795746,10.24,202,8,15,15,11,15,[],[],[],[],[],[],[17],[],"[25, 33]",[],[],"[18, 19, 20, 21, 22]",[72],[],[],[67],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[65],[],[],1595.724611 +1095,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/ApiVersionSetContractInner.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.452228502,31,1302.316725,5.253731343,91,4,0,5,8,5,[],[],[],[],[],[],[],[],[],[],[],"[25, 32, 39, 45, 53]",[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],573.0398702 +1106,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/BackendsImpl.java,10,1,1,1,1,1,1,28,2.6,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.672676384,29,2356.758551,7.771428571,104,4,5,4,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1190.318487 +1114,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/CertificatesImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.679467652,27,2495.145221,7.876923077,108,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1077.897763 +1119,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/DiagnosticContractInner.java,12,1,1,1,1,1,1,37,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.493828513,37,1262.751731,5.135802469,103,4,0,6,8,6,[],[],[],[],[],[],[],[],[],[],[],"[28, 34, 40, 46, 52, 59]",[],[],[],[],[],[],[],[],[],[],[],[],[],[22],[],[],[],[],[],[],[],743.3982714 +1120,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/DiagnosticsImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.688084704,27,2489.237529,7.876923077,110,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1083.639 +1123,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/EmailTemplateContractInner.java,11,1,1,1,1,1,1,34,2.454545455,2,3,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.582787648,34,1458.518421,5.12,110,4,0,6,10,7,[],[],[],[],[],[],[],[],[],[],[],"[26, 32, 38, 44, 51, 57]",[],[],[],[],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[],[],[],707.0159304 +1130,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/GroupUserContractImpl.java,21,1.105263158,1,3,1.105263158,1,3,63,3,2,8,0,0,0,0,0,0,1,0,2,0,0,4,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.59901648,61,1315.470962,10.37837838,159,8,14,13,15,9,[],[],[],[],[],[],[21],[],"[28, 36]",[],[],"[23, 24, 25, 26]","[76, 103]",[],[],[71],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[69],[],[],1727.579 +1137,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/IdentityProviderContractInner.java,18,1,1,1,1,1,1,55,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.544175112,55,1536.652225,5.24137931,122,4,0,9,10,9,[],[],[],[],[],[],[],[],[],[],[],"[27, 33, 39, 45, 51, 57, 63, 71, 79]",[],[],[],[],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[],[],[],1003.27739 +1142,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/IssueCommentContractImpl.java,19,1.055555556,1,2,1.055555556,1,2,68,3.277777778,2,9,0,0,0,0,0,0,1,0,2,0,0,7,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,6,0,0,0,3.534820737,66,1946.952498,10.18181818,202,8,11,11,11,15,[],[],[],[],[],[],[16],[],"[26, 34]",[],[],"[18, 19, 20, 21, 22, 23, 24]",[75],[],[],[],[],[],[],[],[],[],[],[],[],[16],[],[],[],[],[68],[],[],1656.163989 +1144,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/IssueContractImpl.java,25,1.041666667,1,2,1.041666667,1,2,80,3,2,8,0,0,0,0,0,0,1,0,2,0,0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.5228709,78,1149.16378,9.718518519,136,8,17,17,11,16,[],[],[],[],[],[],[17],[],"[26, 34]",[],[],"[19, 20, 21, 22, 23, 24]",[74],[],[],[],[],[],[],[],[],[],[],[],[],[17],[],[],[],[],[67],[],[],1857.172754 +1147,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/IssuesInner.java,51,2.217391304,1,11,6.695652174,1,64,89,3.296296296,1,13,0,0,0,0,0,0,0,0,0,0,4,2,17,0,0,0,0,0,0,0,0,0,4,0,0,1,0,0,0,0,0,0,0,0,3.883060053,147,6378.607099,12.56441718,276,12,14,6,24,16,[],[],[],[],[],[],[],[],[],[],"[184, 315, 408, 519]","[42, 44]","[149, 166, 169, 172, 175, 280, 300, 303, 306, 309, 390, 393, 396, 399, 402, 496, 512]",[],[],[],[],[],[],[],[],[],"[178, 179, 180, 515]",[],[],[40],[],[],"[178, 179, 180]",[],[],[],[],8136.407342 +1148,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/LoggerContractImpl.java,24,1.043478261,1,2,1.043478261,1,2,74,2.913043478,2,7,0,0,0,0,0,0,1,0,2,0,0,5,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.548291729,72,1348.046494,10.15873016,169,8,15,15,11,15,[],[],[],[],[],[],[18],[],"[26, 34]",[],[],"[19, 20, 21, 22, 23]",[73],[],[],[68],[],[],[],[],[],[],[],[],[],[18],[],[],[],[],[66],[],[],1662.208383 +1149,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/LoggerContractInner.java,10,1,1,1,1,1,1,31,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.561074241,31,1040.243393,5.176470588,77,4,0,5,10,5,[],[],[],[],[],[],[],[],[],[],[],"[27, 33, 41, 48, 55]",[],[],[],[],[],[],[],[],[],[],[],[],[],[21],[],[],[],[],[],[],[],637.5486885 +1150,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/LoggersImpl.java,9,1,1,1,1,1,1,25,2.555555556,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.679467652,27,2119.60676,7.876923077,103,4,4,3,15,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],1077.897763 +1160,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/NotificationRecipientEmailsImpl.java,8,1,1,1,1,1,1,22,2.5,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3.666699414,21,3235.909014,7.333333333,141,4,3,4,14,8,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[20],[],[],[],[],[],[],[],861.6756651 +1171,/hdd/aibolit/aibolit/scripts/target/01/Azure/azure-sdk-for-java/sdk/apimanagement/mgmt-v2019_01_01/src/main/java/com/microsoft/azure/management/apimanagement/v2019_01_01/implementation/OperationContractImpl.java,29,1.035714286,1,2,1.035714286,1,2,90,2.928571429,2,8,1,1,1,1,1,1,1,1,2,1,1,6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3.560474264,88,1362.148346,9.290322581,164,8,21,21,11,19,[],[],[],[],[],[],[19],[],"[28, 36]",[],[],"[21, 22, 23, 24, 25, 26]",[76],[],[],[],[],[],[],[],[],[],[],[],[],[19],[],[],[],[],[69],[],[],2161.78292 diff --git a/test/stats/test_stats.py b/test/stats/test_stats.py new file mode 100644 index 00000000..3267188a --- /dev/null +++ b/test/stats/test_stats.py @@ -0,0 +1,103 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import math +import os +from pathlib import Path +from unittest import TestCase + +import numpy as np +import pandas as pd + +from aibolit.config import Config +from aibolit.model.stats import Stats +from aibolit.model.model import get_minimum, generate_fake_dataset + + +class TestStats(TestCase): + + def __init__(self, *args, **kwargs): + super(TestStats, self).__init__(*args, **kwargs) + self.cur_file_dir = Path(os.path.realpath(__file__)).parent + self.config = Config.get_patterns_config() + + def test_get_minimum(self): + minimum_arr = get_minimum([0, 0.23, 0.45], [0.34, 0.01, 0.37], [0.01, 0.50, 0.2]) + self.assertTrue(np.array_equal(minimum_arr[0], np.array([0, 0.01, 0.2]))) + self.assertTrue(np.array_equal(minimum_arr[1], np.array([0, 1, 2]))) + + def test_get_array(self): + lst = [np.array([0, 1, 2, 3, 4, 5], dtype=float), + np.array([0, 1, 2, 3, 3, 3], dtype=float)] + x = np.array(lst) + mask = x > 0 + ncss = np.array([0.01, 0.02]) + res = Stats.change_matrix_by_value(x, mask, 2, ncss) + self.assertTrue( + np.array_equal( + res, + np.array([[0., 1., 2.01, 3., 4., 5.], + [0., 1., 2.02, 3., 3., 3.]]) + ) + ) + + def test_split_dataset_by_pattern_value(self): + x = [[0, 0, 0], [0, 0, 1], [1, 1, 2]] + nulls, not_nulls = Stats.split_dataset_by_pattern_value(x, 2) + self.assertTrue(np.array_equal(nulls[0], np.array([0, 0, 0]))) + self.assertTrue(np.array_equal(not_nulls[0], np.array([0, 0, 1]))) + self.assertTrue(np.array_equal(not_nulls[1], np.array([1, 1, 2]))) + + def __load_mock_model(self): + config = Config.get_patterns_config() + patterns = [x['code'] for x in config['patterns']] + + class MockModel: + + def predict(self, input: np.array) -> np.array: + results = [] + for row in input: + s = sum(row) + radian = math.radians(s) + results.append(math.sin(radian)) + return np.array(results) + + class PatternRankingModel: + + def __init__(self): + self.features_conf = { + 'features_order': patterns, + 'patterns_only': patterns + } + self.model = MockModel() + + return PatternRankingModel() + + def test_stat_aibolit_pipeline(self): + model = self.__load_mock_model() + test_df = generate_fake_dataset() + table = Stats.aibolit_stat(test_df, model) + test_csv = Path(self.cur_file_dir, 'results_test.csv') + results_df = pd.read_csv(test_csv, index_col=0) + all_elements_compared: pd.DataFrame = table.eq(results_df) + bool_eq_elems = np.ravel(all_elements_compared.values) + are_equal_arrays = np.logical_and.reduce(bool_eq_elems, axis=0) + self.assertTrue(are_equal_arrays) diff --git a/test/stats/train_mock.csv b/test/stats/train_mock.csv new file mode 100644 index 00000000..9f212f98 --- /dev/null +++ b/test/stats/train_mock.csv @@ -0,0 +1,25 @@ +,filename,cyclo,cyclo_method_avg,cyclo_method_min,cyclo_method_max,npath_method_avg,npath_method_min,npath_method_max,ncss,ncss_method_avg,ncss_method_min,ncss_method_max,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15,P16,P17,P18,P19,P20_5,P20_7,P20_11,P21,P22,P23,P24,P25,P26,P28,P29,P30,P31,P32,M1,M2,M3_1,M3_2,M3_3,M3_4,M4,M5,M6,M7,lines_P1,lines_P2,lines_P3,lines_P4,lines_P5,lines_P6,lines_P7,lines_P8,lines_P9,lines_P10,lines_P11,lines_P12,lines_P13,lines_P14,lines_P15,lines_P16,lines_P17,lines_P18,lines_P19,lines_P20_5,lines_P20_7,lines_P20_11,lines_P21,lines_P22,lines_P23,lines_P24,lines_P25,lines_P26,lines_P28,lines_P29,lines_P30,lines_P31,lines_P32,halstead volume +0,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/android/src/mindustry/android/AndroidLauncher.java,19.0,4.75,1.0,6.0,1288.0,1.0,5100.0,60.0,13.75,3.0,21.0,0,0,0,0,2,0,0,0,0,3,3,3,7,0,0,0,4,5,0,0,0,0,7,1,0,1,0,0,2,0,1,1,0,4.219471310473399,137,1268.8431472947602,19.703703703703702,145,24,28,2,37,56,[],[],[],[],"[59, 247]",[],[],[],[],"[116, 169, 170]","[80, 195, 39]","[34, 35, 36]","[86, 86, 185, 188, 198, 211, 248]",[],[],[],"[116, 169, 216, 229]","[40, 66, 131, 66, 131]",[],[],[],[],"[62, 122, 155, 208, 209, 210, 216]",[180],[],[32],[],[],"[190, 199]",[],[39],[155],[],5152.7016229680485 +1,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/impl/AssetsProcess.java,13.0,3.25,1.0,7.0,16.0,1.0,48.0,57.0,14.0,4.0,31.0,0,0,0,0,1,0,0,0,0,11,0,0,0,0,0,0,21,0,0,11,10,7,9,0,0,1,0,0,0,0,0,0,0,4.040301402817462,70,2893.837231352718,6.571428571428571,199,8,18,1,28,38,[],[],[],[],[75],[],[],[],[],"[36, 42, 54, 72, 81, 82, 86, 95, 117, 121, 126]",[],[],[],[],[],[],"[21, 22, 33, 34, 42, 44, 45, 48, 50, 51, 68, 75, 100, 106, 109, 111, 113, 115, 116, 117, 121]",[],[],"[121, 125, 124, 84, 36, 42, 84, 85, 45, 54, 109]","[121, 125, 124, 84, 42, 84, 85, 45, 54, 109]","[121, 125, 124, 84, 84, 85, 54]","[57, 60, 61, 63, 73, 96, 97, 109, 111]",[],[],[17],[],[],[],[],[],[],[],4990.13368208755 +6,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/remote/MethodEntry.java,2.0,1.0,1.0,1.0,1.0,1.0,1.0,24.0,6.5,2.0,11.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3.371730262636704,24,825.9303135888501,16.243902439024392,96,19,1,11,6,10,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[8],[],[],[],[],[],[],[],380.7356171694285 +7,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/remote/RemoteProcess.java,8.0,8.0,8.0,8.0,15.0,15.0,15.0,50.0,37.0,37.0,37.0,0,0,0,0,1,0,0,0,0,4,0,5,0,0,0,0,0,0,0,1,1,1,7,0,0,1,0,0,0,0,0,0,0,4.043098946005742,44,1823.383800410116,46.73684210526316,109,16,14,2,20,34,[],[],[],[],[93],[],[],[],[],"[108, 110, 115, 120]",[],"[34, 36, 38, 40, 42]",[],[],[],[],[],[],[],[93],[93],[93],"[55, 65, 89, 92, 100, 113, 119]",[],[],[20],[],[],[],[],[],[],[],2049.1658652327537 +8,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/remote/RemoteReadGenerator.java,18.0,9.0,1.0,17.0,322.5,1.0,644.0,59.0,28.5,2.0,55.0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,8,0,0,2,2,1,6,0,0,1,0,0,0,0,0,0,1,3.9442094837623536,56,4245.186813186813,109.71428571428572,202,28,36,2,30,30,[],[],[],[14],[108],[],[],[],[],[135],[],[],[88],[],[],[],"[59, 61, 79, 89, 94, 108, 113, 118]",[],[],"[49, 98]","[49, 98]",[98],"[35, 47, 52, 53, 67, 134]",[],[],[14],[],[],[],[],[],[],[70],2541.687817865287 +11,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/util/Selement.java,24.0,1.1428571428571428,1.0,4.0,1.1428571428571428,1.0,3.0,49.0,2.238095238095238,2.0,7.0,0,0,0,0,6,0,0,3,0,0,0,0,3,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,3.816321652531544,47,1143.1245656241647,8.930232558139535,103,4,9,7,13,7,[],[],[],[],"[35, 39, 43, 66, 90, 109]",[],[],"[47, 51, 55]",[],[],[],[],"[67, 74, 109]",[],[],[67],[],[],[],[],[],[],[66],[],[],[15],[],[],[],[],[],[],[],1346.8203997078442 +13,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/annotations/src/main/java/mindustry/annotations/util/Stype.java,12.0,1.0,1.0,1.0,1.0,1.0,1.0,25.0,2.0,2.0,2.0,0,0,0,0,5,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,3.4270363268324537,25,2676.685990338164,9.244444444444444,127,4,0,6,14,6,[],[],[],[],"[16, 40, 44, 49, 54]",[],[],"[44, 48, 53]",[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[9],[],[15],[],[],[],[],[],923.3694326071593 +18,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ai/formations/Formation.java,25.0,2.083333333333333,1.0,4.0,2.333333333333333,1.0,8.0,82.0,6.083333333333332,2.0,17.0,0,0,0,0,0,0,0,0,0,1,0,5,3,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,2,0,3.840713117155383,86,1275.3528711484594,31.159663865546218,115,20,14,3,15,24,[],[],[],[],[],[],[],[],[],[201],[],"[22, 25, 27, 29, 31]","[93, 195, 223]",[],[],[],[],[],[],[],[],[],[194],[],[],[19],[],[],[],[],[],"[47, 58]",[],1867.8865533846488 +24,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/ai/types/FormationAI.java,7.0,1.4,1.0,3.0,1.4,1.0,3.0,21.0,3.4,2.0,8.0,0,0,0,0,0,0,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3.5183349851231367,23,754.6543385490753,19.027027027027028,68,16,4,4,12,6,[],[],[],[],[],[],[],[],[],[38],[],"[9, 11, 12]",[],[],[],[],[],[],[],[],[],[],[],[],[],[8],[],[],[],[],[],[],[],519.2347694592746 +26,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/async/AsyncLogic.java,13.0,2.8,1.0,5.0,4.25,3.0,7.0,34.0,6.5,3.0,10.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,3.6515547347096367,51,685.1384615384616,26.5,88,16,16,2,14,12,[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[23],[],[],[12],[],[],[],[],[],[],[],717.6824520221446 +29,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/content/Bullets.java,1.0,1.0,1.0,1.0,576.0,576.0,576.0,53.0,51.0,51.0,51.0,0,0,0,0,0,0,0,1,0,2,0,7,1,0,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,4.244297456741329,448,700.591186258103,6.480286738351254,178,20,20,1,9,145,[],[],[],[],[],[],[],[594],[],"[413, 534]",[],"[17, 496, 497, 498, 499, 500, 501]",[427],[],[],[],[],"[643, 643]",[],[],[],[],[],[],[],[16],[],[],[],[],[],[],[535],16124.20764779684 +41,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/core/GameState.java,12.0,1.0,1.0,1.0,1.1666666666666667,1.0,3.0,36.0,2.0,1.0,3.0,0,0,0,0,0,0,0,0,0,0,0,9,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3.783672049231015,39,786.7503912363068,6.309859154929577,77,4,6,12,10,20,[],[],[],[],[],[],[],[],[],[],[],"[15, 17, 19, 21, 23, 25, 27, 29, 31]","[44, 44, 48]",[],[],[],[],[],[],[],[],[],[],[],[],[13],[],[],[],[],[],[],[],882.2992752062506 +42,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/core/Logic.java,46.0,5.111111111111111,1.0,17.0,118.11111111111113,1.0,512.0,83.0,9.111111111111107,3.0,19.0,0,0,0,0,0,0,0,1,0,5,0,0,2,0,0,0,0,0,0,0,0,0,3,0,0,1,0,2,1,0,0,0,2,3.868603278351877,141,1209.3145872129112,61.13966480446928,155,36,70,5,21,49,[],[],[],[],[],[],[],[52],[],"[113, 115, 188, 189, 191]",[],[],"[151, 158]",[],[],[],[],[],[],[],[],[],"[47, 48, 65]",[],[],[30],[],"[171, 202]",[147],[],[],[],"[114, 187]",4333.57168668884 +47,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/core/Version.java,7.0,7.0,7.0,7.0,24.0,24.0,24.0,25.0,18.0,18.0,18.0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,1,0,0,0,0,0,3.765648645147874,23,1281.32868757259,4.682926829268292,109,4,8,1,18,12,[],[],[],[],[],[],[],[],[],[],[],"[12, 14, 16, 18, 20, 22]",[],[],[],[],[],[],[],[],[],[],"[27, 29]",[],[],[10],[],[24],[],[],[],[],[],729.65628392206 +48,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/editor/EditorTile.java,36.0,3.272727272727273,1.0,8.0,15.818181818181818,1.0,72.0,83.0,7.454545454545454,2.0,19.0,0,0,0,0,3,0,0,1,0,1,0,0,2,0,0,0,0,14,0,0,0,0,1,0,0,1,0,0,1,0,2,0,0,3.6282044047575535,92,468.92664092664086,8.936936936936934,77,16,31,5,13,18,[],[],[],[],"[56, 120, 134]",[],[],[27],[],[45],[],[],"[119, 120]",[],[],[],[],"[23, 23, 43, 51, 51, 64, 64, 76, 76, 88, 88, 100, 106, 113]",[],[],[],[],[122],[],[],[14],[],[],[117],[],"[99, 111]",[],[],1665.6786252099384 +51,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/editor/MapLoadDialog.java,6.0,3.0,1.0,5.0,7.0,2.0,12.0,40.0,19.0,9.0,29.0,0,0,0,0,0,0,0,0,0,6,0,1,0,0,0,0,0,0,0,3,3,3,6,0,0,1,0,0,1,0,0,0,0,3.680120135368997,40,470.7731397459165,5.052631578947368,61,4,6,1,14,14,[],[],[],[],[],[],[],[],[],"[30, 48, 57, 58, 60, 67]",[],[14],[],[],[],[],[],[],[],"[61, 63, 63]","[61, 63, 63]","[61, 63, 63]","[21, 41, 43, 45, 47, 51]",[],[],[13],[],[],[14],[],[],[],[],1185.737323185475 +54,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/editor/MapSaveDialog.java,6.0,1.5,1.0,2.0,2.75,1.0,6.0,27.0,6.0,2.0,12.0,0,0,0,0,0,1,0,0,0,4,0,2,1,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,3.6360127006993848,44,416.1099947117927,7.868852459016392,48,4,5,3,11,9,[],[],[],[],[],[27],[],[],[],"[24, 36, 37, 40]",[],"[12, 13]",[71],[],[],[],[],[],[],[],[],[],"[25, 43, 70]",[],[],[11],[],[],[],[],[],[],[],861.3019484057222 +56,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/editor/OperationStack.java,10.0,1.4285714285714286,1.0,2.0,1.4285714285714286,1.0,2.0,28.0,3.4285714285714284,1.0,6.0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3.4400103938312037,29,459.40090090090087,7.1111111111111125,50,4,8,2,10,5,[],[],[],[],[],[],[],[],[],"[40, 48]",[],"[7, 8]",[],[],[],[],[],[],[],[],[],[],[],[],[],[5],[],[],[],[],[],[],[],489.30622957777 +57,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/editor/WaveInfoDialog.java,9.0,1.8,1.0,3.0,127.2,3.0,578.0,44.0,7.4,5.0,12.0,0,0,0,0,0,0,0,0,0,36,0,5,2,0,0,0,3,0,0,1,1,0,3,0,0,1,0,0,0,0,0,2,2,3.963174372017297,188,1072.3006470881037,7.183673469387755,140,4,5,2,29,54,[],[],[],[],[],[],[],[],[],"[51, 55, 56, 62, 86, 87, 89, 101, 102, 104, 116, 118, 141, 142, 143, 145, 146, 151, 157, 158, 170, 171, 177, 182, 190, 196, 200, 203, 205, 225, 227, 247, 248, 260, 261, 267]",[],"[29, 31, 32, 33, 34]","[90, 139]",[],[],[],"[171, 182, 261]",[],[],[235],[235],[],"[52, 223, 251]",[],[],[27],[],[],[],[],[],"[141, 200]","[253, 257]",7009.068358581604 +58,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/Effects.java,25.0,3.571428571428572,1.0,6.0,5.857142857142858,1.0,12.0,58.0,8.0,2.0,17.0,0,0,0,0,1,0,0,1,0,2,0,0,4,0,0,0,2,0,0,0,0,0,6,0,0,1,1,6,0,0,0,0,0,3.9047476341197647,52,1437.614241001565,4.957746478873238,109,4,28,2,15,25,[],[],[],[],[52],[],[],[52],[],"[51, 65]",[],[],"[26, 59, 74, 87]",[],[],[],"[78, 89]",[],[],[],[],[],"[28, 61, 73, 78, 86, 89]",[],[],[16],[19],"[25, 34, 38, 58, 70, 83]",[],[],[],[],[],2081.416110476498 +61,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/Fires.java,14.0,2.333333333333333,1.0,4.0,4.166666666666667,1.0,12.0,35.0,5.333333333333333,2.0,14.0,0,0,0,0,0,0,0,0,0,1,0,0,4,0,0,0,0,0,0,0,0,0,2,0,0,1,0,6,0,0,0,0,0,3.6001000050049035,36,984.2882205513783,6.857142857142858,111,4,23,6,13,8,[],[],[],[],[],[],[],[],[],[68],[],[],"[20, 24, 46, 53]",[],[],[],[],[],[],[],[],[],"[22, 45]",[],[],[14],[],"[19, 37, 41, 52, 63, 67]",[],[],[],[],[],1231.7750872171237 +62,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/Lightning.java,7.0,3.5,1.0,6.0,22.0,1.0,43.0,40.0,15.5,2.0,29.0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,5,0,0,1,1,1,0,0,0,1,0,4.01111424068341,50,1632.385336743393,23.52941176470589,144,24,12,3,23,29,[],[],[],[],[],[],[],[],[],"[53, 61]",[],"[22, 23]","[50, 73]",[],[],[],[],[],[],[],[],[],"[36, 45, 46, 49, 71]",[],[],[15],[32],[26],[],[],[],[40],[],1913.559972534845 +63,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/Predict.java,13.0,2.6,1.0,5.0,2.8,1.0,5.0,44.0,8.2,2.0,17.0,0,0,0,0,2,0,0,1,0,0,0,2,1,0,0,0,0,0,0,1,0,0,6,0,0,1,1,4,1,0,0,0,0,3.7912519018280944,39,1931.2462877624166,11.35483870967742,165,8,17,3,21,25,[],[],[],[],"[61, 62]",[],[],[60],[],[],[],"[12, 13]",[42],[],[],[],[],[],[],[47],[],[],"[29, 33, 34, 35, 38, 41]",[],[],[11],[74],"[26, 54, 58, 70]",[75],[],[],[],[],1899.2884064701057 +64,/hdd/aibolit/aibolit/scripts/target/01/Anuken/Mindustry/core/src/mindustry/entities/Puddles.java,31.0,3.875,1.0,12.0,11.375,1.0,72.0,61.0,7.25,2.0,28.0,0,0,0,0,0,1,0,0,0,1,0,0,4,0,0,0,0,0,0,0,0,0,2,0,0,1,2,6,0,0,0,0,0,3.765813741088862,66,2012.1192886456906,8.658823529411764,104,8,50,4,20,21,[],[],[],[],[],[88],[],[],[],[77],[],[],"[33, 41, 49, 71]",[],[],[],[],[],[],[],[],[],"[39, 48]",[],[],[12],"[81, 106]","[18, 23, 28, 32, 70, 76]",[],[],[],[],[],2682.314987472541 diff --git a/test/utils/MethodUseOtherMethodExample.java b/test/utils/MethodUseOtherMethodExample.java index 27b2a966..f7d0b9a4 100644 --- a/test/utils/MethodUseOtherMethodExample.java +++ b/test/utils/MethodUseOtherMethodExample.java @@ -1,3 +1,6 @@ +/** +* Some documentation +*/ class MethodUseOtherMethod { private int connectingField = 0; private int redundantField = 0; diff --git a/test/utils/test_ast_node.py b/test/utils/test_ast_node.py new file mode 100644 index 00000000..a030ba73 --- /dev/null +++ b/test/utils/test_ast_node.py @@ -0,0 +1,62 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Aibolit +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from unittest import TestCase +from pathlib import Path + +from aibolit.ast_framework import AST, ASTNodeType +from aibolit.utils.ast_builder import build_ast + + +class ASTNodeTestSuite(TestCase): + def test_fields(self): + ast = AST.build_from_javalang( + build_ast( + Path(__file__).absolute().parent / "MethodUseOtherMethodExample.java" + ) + ) + package = ast.get_root() + assert len(package.types) == 1 and \ + package.types[0].node_type == ASTNodeType.CLASS_DECLARATION + + java_class = package.types[0] + self.assertEqual(java_class.name, "MethodUseOtherMethod") + self.assertEqual(java_class.modifiers, set()) + self.assertEqual(java_class.documentation, "/**\n* Some documentation\n*/") + + fields_names = {field.name for field in java_class.fields} + self.assertEqual(fields_names, {"connectingField", "redundantField"}) + + methods_names = {method.name for method in java_class.methods} + self.assertEqual( + methods_names, + { + "useOnlyMethods1", + "useOnlyMethods2", + "getField", + "setField", + "standAloneMethod", + "shadowing", + }, + ) + + self.assertEqual(set(java_class.constructors), set()) diff --git a/test/utils/test_java_class_decomposition.py b/test/utils/test_java_class_decomposition.py index 8caaa7b2..3347ab17 100644 --- a/test/utils/test_java_class_decomposition.py +++ b/test/utils/test_java_class_decomposition.py @@ -20,13 +20,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from unittest import TestCase +from unittest import TestCase, skip from pathlib import Path from aibolit.ast_framework.java_class_decomposition import decompose_java_class from aibolit.ast_framework.java_package import JavaPackage +@skip("Usage of deprecated API breaks test") class JavaClassDecompositionTestSuite(TestCase): def test_strong_decomposition(self): java_package = JavaPackage(Path(__file__).parent.absolute() / 'MethodUseOtherMethodExample.java') diff --git a/uml/classes_diagram.png b/uml/classes_diagram.png new file mode 100644 index 00000000..0ac6c146 Binary files /dev/null and b/uml/classes_diagram.png differ diff --git a/uml/classes_diagram.puml b/uml/classes_diagram.puml new file mode 100644 index 00000000..4e6f4315 --- /dev/null +++ b/uml/classes_diagram.puml @@ -0,0 +1,68 @@ +@startuml + +set namespaceSeparator :: + +model::model::PatternRankingModel <|-- sklearn.base.BaseEstimator + +class model::stats::Stats { +{method} {static} + aibolit_stat(test_csv: pd.DataFrame, model=None) -> pd.DataFrame +{method} {static} + count_acts(acts: np.array, ranked: np.array) -> Tuple[np.array, np.array] +{method} {static} + get_patterns_name() -> Dict[Any, Any] +{method} {static} + get_table(features_conf: Dict[Any, Any], m: np.array, p: np.array, acts_complexity) -> pd.DataFrame +{method} {static} + split_dataset_by_pattern_value(X: np.array, pattern_idx: int) -> Tuple[np.array, np.array] +{method} {static} + change_matrix_by_value(arr: np.array, mask: np.array, i: int, incr: np.array) -> np.array +{method} {static} + check_impact(X: np.array, model_input: Any) -> Tuple[np.array, np.array, np.array, np.array] +} + +class model::model::PatternRankingModel { +{field} +bool: do_rename_columns +{field} +CatBoost: model +{field} +Dict[Any, Any]: features_conf + +{method} +fit_regressor(self, X, y, display=False) -> None +{method} +sigmoid(self, x) -> float +{method} -__get_pairs(self, item, th: float, feature_importances=None) -> Tuple[np.array, np.array] +{method} -__vstack_arrays(self, res) -> np.array +{method} +calculate_score(self, X, quantity_func='log', th=1.0, feature_importances=None) -> Tuple[np.array, np.array] +{method} +rank(self, snippet, scale=True, th=1) -> Tuple[List[int], List[int]] +} + +class model::model::global_functions { + +{method} {static} + get_minimum(c1: np.array, c2: np.array, c3: np.array) -> Tuple[np.array, np.array]: +{method} {static} + generate_fake_dataset() -> pd.DataFrame +{method} {static} + scale_dataset(df: pd.DataFrame, features_conf: Dict[Any, Any], scale_ncss=True) -> pd.DataFrame + +} + +class aibolit::__main__ { + +{method} {static} + list_dir(path, files) +{method} {static} + predict(input_params, model, args) +{method} {static} + run_parse_args(commands_dict) +{method} {static} + train() +{method} {static} + __count_value(value_dict, input_params, code_lines_dict, java_file: str, is_metric=False) +{method} {static} + flatten(l): +{method} {static} + add_pattern_if_ignored(dct: Dict[str, Any], pattern_item: Dict[Any, Any], results_list: List[Any]) -> None +{method} {static} + find_annotation_by_node_type(tree: javalang.tree.CompilationUnit,node_type) -> Dict[Any, Any] +{method} {static} + find_start_and_end_lines(node) -> Tuple[int, int] +{method} {static} + check_max_position(node) +{method} {static} + traverse(node): +{method} {static} + calculate_patterns_and_metrics(file, args): +{method} {static} + inference(input_params: List[int], code_lines_dict, args) +{method} {static} + run_recommend_for_file(file: str, args) +{method} {static} + create_xml_tree(results, full_report, cmd, exit_code) +{method} {static} + get_exit_code(results): +{method} {static} + create_text(results, full_report, is_long=False): +{method} {static} + show_summary(buffer, importances_for_all_classes, is_long, results, total_patterns) +{method} {static} + print_total_score_for_file +{method} {static} + check() +{method} {static} + handle_exclude_command_line(args) +{method} {static} + format_converter_for_pattern(results, sorted_by=None) +{method} {static} + version() +{method} {static} + run_thread(files, args) +{method} {static} + get_versions(pkg_name) +{method} {static} + main() + +} +@enduml \ No newline at end of file diff --git a/uml/recommendation_pipeline.png b/uml/recommendation_pipeline.png new file mode 100644 index 00000000..33fbde91 Binary files /dev/null and b/uml/recommendation_pipeline.png differ diff --git a/uml/recommendation_pipeline.puml b/uml/recommendation_pipeline.puml new file mode 100644 index 00000000..3239e9a0 --- /dev/null +++ b/uml/recommendation_pipeline.puml @@ -0,0 +1,120 @@ +@startuml + +skinparam backgroundColor #EEEBDC +skinparam handwritten true + +skinparam sequence { +ArrowColor DeepSkyBlue +ActorBorderColor DeepSkyBlue +LifeLineBorderColor blue +LifeLineBackgroundColor #A9DCDF + +ParticipantBorderColor DeepSkyBlue +ParticipantBackgroundColor DodgerBlue +ParticipantFontName Impact +ParticipantFontSize 17 +ParticipantFontColor #A9DCDF + +ActorBackgroundColor aqua +ActorFontColor DeepSkyBlue +ActorFontSize 17 +ActorFontName Aapex +} + +participant "main" as main +participant "check" as check +participant "handle_exclude_command_line" as handle_exclude_command_line +participant "run_thread" as run_thread +participant "run_recommend_for_file" as run_recommend_for_file +participant "find_annotation_by_node_type" as find_annotation_by_node_type +participant "run_recommend_for_file" as run_recommend_for_file +participant "calculate_patterns_and_metrics" as calculate_patterns_and_metrics +participant "run_recommend_for_file" as run_recommend_for_file +participant "Config" as Config +participant "__count_value" as __count_value +participant "inference" as inference +participant "add_pattern_if_ignored" as add_pattern_if_ignored +participant "create_xml_tree" as create_xml_tree +participant "get_exit_code" as get_exit_code +participant "format_converter_for_pattern" as format_converter_for_pattern +participant "predict" as predict +participant "PatternRankingModel.rank" as PatternRankingModel.rank +participant "get_exit_code" as get_exit_code + +activate main +activate check +main -> check +check -> handle_exclude_command_line: parse cmd for exclude pattern +handle_exclude_command_line -> check: ignored files +check -> check: filter ignored files +||| +check -> run_thread: prepare ThreadPoolExecutor +activate run_thread +run_thread -> run_recommend_for_file: parallel run for each file +activate run_recommend_for_file +run_recommend_for_file -> find_annotation_by_node_type: for class_declaration +find_annotation_by_node_type -> run_recommend_for_file: annotations with class_declaration +run_recommend_for_file -> find_annotation_by_node_type: for field_declaration +find_annotation_by_node_type -> run_recommend_for_file: annotations with field_declaration +run_recommend_for_file -> find_annotation_by_node_type: for method_declaration +find_annotation_by_node_type -> run_recommend_for_file: annotations with method_declaration +run_recommend_for_file -> calculate_patterns_and_metrics +activate calculate_patterns_and_metrics +calculate_patterns_and_metrics -> Config: get_patterns_config() +Config -> calculate_patterns_and_metrics: config with patterns and metrics +calculate_patterns_and_metrics -> __count_value: pass config +activate __count_value +__count_value -> __count_value: run value() of a pattern/metric +__count_value -> calculate_patterns_and_metrics: input_params, code_lines_dict, error_string +deactivate __count_value +deactivate calculate_patterns_and_metrics +run_recommend_for_file -> inference: input_params, code_lines_dict, args +activate inference +inference -> Config: folder_model_data() +Config -> inference: filename for model loading +inference -> inference: pickle load PatternRankingModel +inference -> predict: input_params, model, args +activate predict +predict -> predict: get features order from PatternRankingModel +predict-> PatternRankingModel.rank: input_params +activate PatternRankingModel.rank +PatternRankingModel.rank -> PatternRankingModel.rank: predict value with catboost, input_params as input +PatternRankingModel.rank -> PatternRankingModel.rank: predict decreased value with catboost, input_params as input +PatternRankingModel.rank -> predict: preds, importances +deactivate PatternRankingModel.rank +predict -> inference: sorted_patterns_codes, importances +deactivate predict +inference -> inference: calulate aibolit score based on sorted_patterns_codes, importances +inference -> run_recommend_for_file: results_list +deactivate inference +run_recommend_for_file -> add_pattern_if_ignored: input is results_list +activate add_pattern_if_ignored +{static} add_pattern_if_ignored -> run_recommend_for_file: output is filtered array +deactivate add_pattern_if_ignored +run_recommend_for_file -> run_thread: future +deactivate run_recommend_for_file +run_thread -> check: yield future.result() +deactivate run_thread + +check -> get_exit_code: pass results +get_exit_code -> check: exit code + +activate check #DarkSalmon +check -> create_xml_tree: if args.format == 'xml': +activate create_xml_tree +create_xml_tree -> check: xml root +deactivate create_xml_tree +check -> check: write xml +check -> format_converter_for_pattern +activate format_converter_for_pattern +format_converter_for_pattern -> check: prepared data structure +deactivate format_converter_for_pattern +check -> create_text +activate create_text +create_text -> check: ascii text +deactivate create_text +deactivate check +check -> main: exit code +deactivate check +deactivate main +@enduml \ No newline at end of file diff --git a/uml/train_pipeline.png b/uml/train_pipeline.png new file mode 100644 index 00000000..61f8f8a2 Binary files /dev/null and b/uml/train_pipeline.png differ diff --git a/uml/train_pipeline.puml b/uml/train_pipeline.puml new file mode 100644 index 00000000..b4a05030 --- /dev/null +++ b/uml/train_pipeline.puml @@ -0,0 +1,73 @@ +@startuml +@startuml +skinparam backgroundColor #EEEBDC +skinparam handwritten true + +skinparam sequence { +ArrowColor DeepSkyBlue +ActorBorderColor DeepSkyBlue +LifeLineBorderColor blue +LifeLineBackgroundColor #A9DCDF + +ParticipantBorderColor DeepSkyBlue +ParticipantBackgroundColor DodgerBlue +ParticipantFontName Impact +ParticipantFontSize 17 +ParticipantFontColor #A9DCDF + +ActorBackgroundColor aqua +ActorFontColor DeepSkyBlue +ActorFontSize 17 +ActorFontName Aapex +} + +participant "main" as main +participant "train" as train +participant "collect_dataset" as collect_dataset +participant "model" as model +participant "test_dataset" as test_dataset +participant "Config" as Config +participant "run_cmd" as run_cmd +activate main +activate train +main -> train: if not args.skip_collect_dataset: +train -> collect_dataset +activate collect_dataset +collect_dataset -> run_cmd: external call 02-filter-and-move.py via make + +collect_dataset -> make_patterns +make_patterns -> run_cmd: external call 04-find-patterns.py via make +activate Config +make_patterns -> Config: dataset_file() +Config -> make_patterns: file for saving results of 04-find-patterns.py +deactivate Config + +collect_dataset -> run_cmd: external call 06-calculate-halstead.py via make +collect_dataset -> run_cmd: external call 07-merge.py via make +collect_dataset -> run_cmd: external call 08-split.py via make + +deactivate collect_dataset +||| +main -> train_process +activate train_process +train_process -> train_process: read dataset, made by 08-split.py +train_process -> train_process: scale_dataset +activate model +train_process -> model: fit_regressor() +deactivate model +train_process -> train_process: save model to file +activate model +train_process -> train_process: load model to file +train_process -> train_process: scale dataset, made by 08-split.py +activate train_process #DarkSalmon +train_process -> model: rank +note right : Smoke tests +model -> train_process: preds, importances +deactivate model +train_process -> train_process: remove trash catboost folder +deactivate train_process +train_process -> train +train -> main +deactivate train +deactivate main +@enduml \ No newline at end of file diff --git a/wp/.gitignore b/wp/.gitignore new file mode 100644 index 00000000..f316b5d2 --- /dev/null +++ b/wp/.gitignore @@ -0,0 +1,11 @@ +*.bbl +*.bcf +*.blg +*.fdb_latexmk +_minted-wp/ +*.fls +*.log +*.run.xml +*.aux +*.pyg +wp.pdf diff --git a/wp/.latexmkrc b/wp/.latexmkrc new file mode 100644 index 00000000..28857bc5 --- /dev/null +++ b/wp/.latexmkrc @@ -0,0 +1,2 @@ +$latex = 'latex %O --shell-escape %S'; +$pdflatex = 'pdflatex %O --shell-escape %S'; diff --git a/wp/Makefile b/wp/Makefile new file mode 100644 index 00000000..eaed478d --- /dev/null +++ b/wp/Makefile @@ -0,0 +1,7 @@ +all: wp.pdf + +clean: + rm -rf *.aux *.bbl *.bcf *.blg *.fdb_latexmk *.fls *.log *.run.xml + +%.pdf: %.tex references.bib + latexmk -pdf $< diff --git a/wp/how_it_works_diagram_5.jpg b/wp/how_it_works_diagram_5.jpg new file mode 100644 index 00000000..6d82d145 Binary files /dev/null and b/wp/how_it_works_diagram_5.jpg differ diff --git a/wp/references.bib b/wp/references.bib index 80afb849..457c058c 100644 --- a/wp/references.bib +++ b/wp/references.bib @@ -1,7 +1,5 @@ %================ Related Works Section ================== - % for white paper - @article{10.1145/69605.2085, author = {Basili, Victor R. and Perricone, Barry T.}, title = {Software Errors and Complexity: An Empirical Investigation}, @@ -320,7 +318,8 @@ @misc{Sayyad-Shirabad+Menzies:2005 author = "Sayyad Shirabad, J. and Menzies, T.J.", year = "2005", isbn_hidden={null}, -url_hidden={https://ieeexplore.ieee.org/document/5989519},} +url_hidden={https://ieeexplore.ieee.org/document/5989519} +} @inproceedings{Better_cross, author = {Peters, Fayola and Menzies, Tim and Marcus, Andrian}, @@ -386,7 +385,8 @@ @INPROCEEDINGS{7510216 keywords={genetic algorithms;neural nets;software fault tolerance;completeness parameter;recall parameter;error rate;PROMISE data repository;project fault datasets;fault prediction model;software module prediction;software fault prediction;genetic programming;neural network;Software;Programming;Artificial neural networks;Computers;Data mining;Number of Software Faults;Software Fault Prediction;Genrtic Programming;Neural Network}, doi_hidden={10.1109/RAECE.2015.7510216}, issn_hidden={null}, -month={Feb},} +month={Feb}, +} @ARTICLE{1717471, author={ {Yuming Zhou} and {Hareton Leung}}, @@ -399,7 +399,8 @@ @ARTICLE{1717471 keywords={object-oriented programming;regression analysis;software fault tolerance;software metrics;object-oriented design metrics;fault-proneness prediction;object-oriented software system;fault severity;logistic regression method;machine learning method;public domain NASA data set;fault-prone classes;Object oriented modeling;Predictive models;Logistics;Learning systems;NASA;Computer Society;Software systems;Fault detection;Decision making;Programming;Object-oriented;faults;fault-proneness;metrics;prediction;cross validation.}, doi_hidden={10.1109/TSE.2006.102}, issn_hidden={2326-3881}, -month={Oct},} +month={Oct}, +} @article{SHATNAWI20081868, title = "The effectiveness of software metrics in identifying error-prone classes in post-release software evolution process", @@ -427,7 +428,8 @@ @INPROCEEDINGS{6349519 keywords={Bayes methods;learning (artificial intelligence);pattern classification;program debugging;software engineering;support vector machines;bug severity;software bug reporting;software development process;bug tracking system;MLT;self-improving software;supervised machine learning technique;software repositories;k-nearest neighbor;naïve Bayes multinomial;support vector machine;J48;RIPPER in;PROMISE repository;NASA;naïve Bayes classifier;Machine learning;Accuracy;Support vector machines;Software;Classification algorithms;Niobium;Text mining;Machine Learning;Supervised Classification;Feature Selection;Bug Severity}, doi_hidden={10.1109/CONSEG.2012.6349519}, issn_hidden={null}, -month={Sep.},} +month={Sep.}, +} @ARTICLE{1702388, author={T. J. {McCabe}}, @@ -440,7 +442,8 @@ @ARTICLE{1702388 keywords={Basis;complexity measure;control flow;decomposition;graph theory;independence;linear;modularization;programming;reduction;software;testing;Software testing;System testing;Graph theory;Fluid flow measurement;Software measurement;Linear programming;Software engineering;Software systems;Software maintenance;National security;Basis;complexity measure;control flow;decomposition;graph theory;independence;linear;modularization;programming;reduction;software;testing}, doi_hidden={10.1109/TSE.1976.233837}, issn_hidden={2326-3881}, -month={Dec},} +month={Dec}, +} @ARTICLE{979986, author={J. {Bansiya} and C. G. {Davis}}, @@ -453,7 +456,8 @@ @ARTICLE{979986 keywords={object-oriented programming;software metrics;software quality;data encapsulation;hierarchical model;object-oriented design quality assessment;high-level design quality attributes;behavioral design properties;object-oriented design metrics;design properties;encapsulation;modularity;coupling;cohesion;high-level quality attributes;reusability;quality attributes;expert opinion;commercial object-oriented systems;quality assessment tool;product metrics;Object oriented modeling;Quality assessment}, doi_hidden={10.1109/32.979986}, issn_hidden={2326-3881}, -month={Jan},} +month={Jan}, +} @article{Incorporating_transitive, author = {Al Dallal, Jehad}, @@ -520,7 +524,8 @@ @INPROCEEDINGS{4301106 keywords={data mining;project management;software fault tolerance;software engineering database;fault module detection;decision making;project manager;data mining algorithm;Software engineering;Fault detection;Data mining;Project management;Engineering management;Databases;Computer science;Decision making;Programming;Software tools}, doi_hidden={10.1109/EUROMICRO.2007.20}, issn_hidden={2376-9505}, -month={Aug},} +month={Aug}, +} @inproceedings{101049, author = {Gray, D. and Bowes, David and Davey, N. and Sun, Yi and Christianson, Bruce}, @@ -553,7 +558,8 @@ @INPROCEEDINGS{8952192 keywords={Class overlap;Software defect prediction;K Means clustering;Machine learning}, doi_hidden={10.1109/ASE.2019.00071}, issn_hidden={1938-4300}, -month={Nov},} +month={Nov}, +} @INPROCEEDINGS{8777507, author={S. {Agarwal} and S. {Gupta} and R. {Aggarwal} and S. {Maheshwari} and L. {Goel} and S. {Gupta}}, @@ -563,10 +569,10 @@ @INPROCEEDINGS{8777507 volume={}, number={}, pages={1-6}, -keywords={Bayes methods;decision trees;program testing;public domain software;software engineering;statistical analysis;software defect prediction;statistical learning;focused testing;chi-square score function;naïve Bayes;decision tree;k nearest neighbors;gradient boosting;random forest;feature selection;select k best algorithm;software development;open source software projects;statistical model;predictive performance;testing cost;Software;Feature extraction;Decision trees;Testing;Computer bugs;Classification algorithms;Prediction algorithms;machine learning;defect prediction;classification;Select K-Best}, doi_hidden={10.1109/IoT-SIU.2019.8777507}, issn_hidden={null}, -month={April},} +month={April} +} @INPROCEEDINGS{1544801, author={V. U. B. {Challagulla} and F. B. {Bastani} and {I-Ling Yen} and R. A. {Paul}}, @@ -579,7 +585,8 @@ @INPROCEEDINGS{1544801 keywords={software quality;safety-critical software;learning (artificial intelligence);regression analysis;belief networks;decision trees;software performance evaluation;machine learning;software defect prediction;real-time software systems;telecontrol systems;robotic systems;mission planning systems;dynamic code synthesis;runtime mission-specific requirements;dynamic dependability assessment;statistical models;stepwise multilinear regression models;multivariate models;artificial neural networks;instance-based reasoning;Bayesian-belief networks;decision trees;rule inductions;software quality;predictor model;consistency-based subset evaluation;telepresence systems;Machine learning;Predictive models;Real time systems;Software systems;Robots;Runtime;Network synthesis;Regression tree analysis;Artificial neural networks;Bayesian methods}, doi_hidden={10.1109/WORDS.2005.32}, issn_hidden={2378-573X}, -month={Feb},} +month={Feb}, +} @inproceedings{10.1145/1137983.1138012, author = {Knab, Patrick and Pinzger, Martin and Bernstein, Abraham}, @@ -609,7 +616,8 @@ @ARTICLE{4027145 keywords={data mining;learning (artificial intelligence);program diagnostics;program testing;software quality;data mining;static code attributes;defect predictor learning;McCabes versus Halstead;lines of code counts;resource-bound exploration;Data mining;Bayesian methods;Artificial intelligence;Software testing;System testing;Learning systems;Art;Software quality;Software systems;Financial management;Data mining detect prediction;McCabe;Halstead;artifical intelligence;empirical;naive Bayes.}, doi_hidden={10.1109/TSE.2007.256941}, issn_hidden={2326-3881}, -month={Jan},} +month={Jan}, +} @INPROCEEDINGS{7476673, author={C. {Macho} and S. {McIntosh} and M. {Pinzger}}, @@ -622,7 +630,8 @@ @INPROCEEDINGS{7476673 keywords={Java;random processes;sampling methods;software maintenance;source code (software);source code;software maintenance;Java open source project;random forest classifier;resampling technique;SMOTE;Predictive models;Java;Training;Data mining;Data models;Computational modeling;Maintenance engineering;Build Co-Change Prediction;Software Evolution;Fine-Grained Source Code Changes}, doi_hidden={10.1109/SANER.2016.22}, issn_hidden={null}, -month={March},} +month={March}, +} @inproceedings{10.1145/2786805.2786813, author = {Jing, Xiaoyuan and Wu, Fei and Dong, Xiwei and Qi, Fumin and Xu, Baowen}, @@ -699,7 +708,8 @@ @ARTICLE{1033229 keywords={neural nets;risk management;principal component analysis;pattern recognition;statistics;software engineering;enhanced neural network technique;software risk analysis;risk categorization;pattern recognition;multivariate statistics;principal component analysis;input data normalization;multicollinearity;cross-normalization;high-risk software modules;Neural networks;Risk analysis;Government;Costs;Principal component analysis;Mathematical model;Predictive models;Programming;Contracts;Pattern recognition}, doi_hidden={10.1109/TSE.2002.1033229}, issn_hidden={2326-3881}, -month={Sep.},} +month={Sep.}, +} @article{semi_supervised_learning, author = {Eng, Softw and Li, Ming and Zhang, Hongyu and wu, Rongxin and Zhou, Zhi-Hua and Zhou, Z.-H and Eng, Autom}, @@ -723,7 +733,8 @@ @INPROCEEDINGS{7965301 keywords={learning (artificial intelligence);program diagnostics;semisupervised software defect prediction problems;CPDP methods;within-project semisupervised defect prediction;WSDP methods;cross-project semisupervised defect prediction;CSDP methods;machine learning;semisupervised structured dictionary learning;SSDL;Dictionaries;Software engineering;Software;Predictive models;Data models;Training;Data mining;cross-project semi-supervised defect prediction;within-project semi-supervised defect prediction;semi-supervised structured dictionary learning}, doi_hidden={10.1109/ICSE-C.2017.72}, issn_hidden={null}, -month={May},} +month={May}, +} @Article{Zhang2017, author="Zhang, Zhi-Wu @@ -806,7 +817,8 @@ @INPROCEEDINGS{1383136 keywords={software fault tolerance;decision trees;software quality;software metrics;fault prone module;software development process;large scale system;random forests;decision tree learning;NASA data sets;logistic regression;discriminant analysis;machine learning;software packages;Robustness;Decision trees;Programming;Fault detection;Fault diagnosis;Predictive models;Large-scale systems;Training data;Voting;NASA}, doi_hidden={10.1109/ISSRE.2004.35}, issn_hidden={1071-9458}, -month={Nov},} +month={Nov}, +} @INBOOK{5989519, author={D. {Huizinga} and A. {Kolawa}}, @@ -821,7 +833,8 @@ @INBOOK{5989519 issn_hidden={null}, publisher={IEEE}, isbn_hidden={null}, -url_hidden={https://ieeexplore.ieee.org/document/5989519},} +url_hidden={https://ieeexplore.ieee.org/document/5989519}, +} @misc{vasic2019neural, title={Neural Program Repair by Jointly Learning to Localize and Repair}, @@ -886,7 +899,8 @@ @INPROCEEDINGS{5609530 keywords={program debugging;software metrics;software packages;software quality;common bug prediction finding;effort aware model;software quality assurance;bug prediction literature;process metrics;package level prediction;Measurement;Predictive models;Computer bugs;Radio frequency;Mathematical model;Regression tree analysis;Computational modeling}, doi_hidden={10.1109/ICSM.2010.5609530}, issn_hidden={1063-6773}, -month={Sep.},} +month={Sep.}, +} @article{10.1007/s10462-017-9563-5, author = {Rathore, Santosh S. and Kumar, Sandeep}, @@ -918,7 +932,8 @@ @ARTICLE{859533 keywords={software maintenance;software metrics;software fault tolerance;management of change;fault incidence;software change history;change management data;change history;fault potential;code decay;metrics;statistical analysis;History;Predictive models;Software systems;Aging;Time measurement;Software measurement;Length measurement;Software development management;Statistical analysis;Degradation}, doi_hidden={10.1109/32.859533}, issn_hidden={2326-3881}, -month={July},} +month={July}, +} @Article{Turhan2009, author="Turhan, Burak @@ -958,7 +973,8 @@ @INPROCEEDINGS{8616596 keywords={computer science education;learning (artificial intelligence);program compilers;program debugging;program diagnostics;recurrent neural nets;syntax errors;logical errors;bug detection methods;feedback system;online judge system;programming problems;accumulates numerous lines;solution source code;short-term memory networks;LSTM networks;bugs;solution codes;IDEs support;Computer bugs;Training;Programming profession;Probability;Debugging}, doi_hidden={10.1109/SMC.2018.00599}, issn_hidden={1062-922X}, -month={Oct},} +month={Oct}, +} @inproceedings{Hinton2001UnsupervisedLF, title={Unsupervised Learning: Foundations of Neural Computation--A Review}, @@ -1228,11 +1244,359 @@ @article{10.1023/A:1010933404324 volume = {45}, number = {1}, issn = {0885-6125}, -url = {https://doi.org/10.1023/A:1010933404324}, -doi = {10.1023/A:1010933404324}, journal = {Mach. Learn.}, month = oct, pages = {5–32}, numpages = {28}, keywords = {ensemble, classification, regression} } + +@inproceedings{10.1145/3194164.3194186, +author = {Campbell, G. Ann}, +title = {Cognitive Complexity: An Overview and Evaluation}, +year = {2018}, +isbn = {9781450357135}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +booktitle = {Proceedings of the 2018 International Conference on Technical Debt}, +pages = {57–58}, +numpages = {2}, +location = {Gothenburg, Sweden}, +series = {TechDebt ’18} +} + +@article{10.1145/3133925, +author = {Chae, Kwonsoo and Oh, Hakjoo and Heo, Kihong and Yang, Hongseok}, +title = {Automatically Generating Features for Learning Program Analysis Heuristics for C-like Languages}, +year = {2017}, +issue_date = {October 2017}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {1}, +number = {OOPSLA}, +journal = {Proc. ACM Program. Lang.}, +month = oct, +articleno = {101}, +numpages = {25}, +keywords = {Automatic feature generation, Data-driven program analysis} +} + +@inproceedings{Learning_a_Strategy_for_Adapting, +author = {Oh, Hakjoo and Yang, Hongseok and Yi, Kwangkeun}, +title = {Learning a Strategy for Adapting a Program Analysis via Bayesian Optimisation}, +year = {2015}, +isbn = {9781450336895}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +booktitle = {Proceedings of the 2015 ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications}, +pages = {572–588}, +numpages = {17}, +keywords = {Bayesian Optimization, rogram Analysis}, +location = {Pittsburgh, PA, USA}, +series = {OOPSLA 2015} +} + +@ARTICLE{Software_quality_the_elusive_target, + author={B. {Kitchenham} and S. L. {Pfleeger}}, + journal={IEEE Software}, + title={Software quality: the elusive target [special issues section]}, + year={1996}, + volume={13}, + number={1}, + pages={12-21}, +} + +@BOOK{The_Economics_of_Software_Quality, + TITLE = {The Economics of Software Quality}, + AUTHOR = {Jones, Capers and Oliver, Bonsignour}, + YEAR = {2012}, + PUBLISHER = {Addison-Wisley}, +} + +@inproceedings{static_code_analysis_for_security, +author = {Kulenovic, Melina and Donko, Dzenana}, +year = {2014}, +month = {05}, +pages = {}, +title = {A survey of static code analysis methods for security vulnerabilities detection}, +} + +@inproceedings{Ilyas2016StaticCA, + title={Static Code Analysis: A Systematic Literature Review and an Industrial Survey}, + author={Bilal Ilyas and Islam Elkhalifa}, + year={2016} +} + +@inproceedings{cognitive_complexity, +author = {Campbell, G. Ann}, +title = {Cognitive Complexity: An Overview and Evaluation}, +year = {2018}, +isbn = {9781450357135}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +booktitle = {Proceedings of the 2018 International Conference on Technical Debt}, +pages = {57–58}, +numpages = {2}, +location = {Gothenburg, Sweden}, +series = {TechDebt ’18} +} + +@article{Software_metric_for_Java, +author = {Balmas, Françoise and Bergel, Alexandre and Denier, Simon and Ducasse, Stéphane and Laval, Jannik and Mordal-Manet, Karine and Abdeen, Hani and Bellingard, Fabrice}, +year = {2010}, +month = {03}, +pages = {}, +title = {Software metric for Java and C++ practices (Workpackage 1.1)} +} + +@article{Code_Smells_and_Software_Quality, +author = {Kaur, Amandeep}, +year = {2019}, +month = {07}, +pages = {}, +title = {A Systematic Literature Review on Empirical Analysis of the Relationship Between Code Smells and Software Quality Attributes}, +journal = {Archives of Computational Methods in Engineering}, +} + +@inproceedings{Overview_Static_Code_Analysis_in_Software_Development, + title={An overview on the Static Code Analysis approach in Software Development}, + author={Ivo Vieira Gomes and Pedro Morgado and Tiago Gomes and Rodrigo Bossini Tavares Moreira}, + year={2009}, + booktitle={ } +} + +@inproceedings{Youness2013ComparativeSO, + title={Comparative Study of Software Quality Models}, + author={Boukouchi Youness and M. Abdel-Aziz and BENLAHMER Habib and Moutachaouik Hicham}, + year={2013} +} + +@article{Chidamber1994AMS, + title={A Metrics Suite for Object Oriented Design}, + author={Shyam R. Chidamber and Chris F. Kemerer}, + journal={IEEE Trans. Software Eng.}, + year={1994}, + volume={20}, + pages={476-493} +} + +@article{Iacob_Constantinescu, + author = {Iacob, I. M. and Constantinescu, R.}, + title = {Testing: First Step Towards Software Quality}, + year = {2008}, + publisher = {Journal of Applied Quantitative Methods}, + address = {}, + booktitle = {}, + pages = {}, + numpages = {}, + location = {}, + series = {} +} + +@article{Liu_Kane, + author = {Liu, X. F. and Kane, G. and Bambroo, M.}, + title = {An intelligent early warning system for software quality improvement and project management}, + year = {2006}, + publisher = {Journal of Systems and Software}, + address = {}, + booktitle = {}, + pages = {}, + numpages = {}, + location = {}, + series = {} +} + +@article{Liu2006AnIE, + title={An intelligent early warning system for software quality improvement and project management}, + author={Xiaoqing Frank Liu and Gautam Kane and Monu Bambroo}, + journal={J. Syst. Softw.}, + year={2006}, + volume={79}, + pages={1552-1564} +} + +@article{Farhan, + author = {Farhan Alebebisat and Zaid Alhalhouli and Tamara E Alshabatat}, + title = {Review of Literature on Software Quality}, + year = {2018}, + address = {}, + booktitle = {}, + pages = {}, + numpages = {}, + location = {}, + series = {} +} + +@inproceedings{Mohammadi2013AnAO, + title={An Analysis of Software Quality Attributes and Their Contribution to Trustworthiness}, + author={Nazila Gol Mohammadi and Sachar Paulus and Mohamed Bishr and Andreas Metzger and Holger K{\"o}nnecke and Sandro Hartenstein and Klaus Pohl}, + booktitle={CLOSER}, + year={2013} +} + +@article{McCabe1976ACM, + title={A Complexity Measure}, + author={Thomas J. McCabe}, + journal={IEEE Transactions on Software Engineering}, + year={1976}, + volume={SE-2}, + pages={308-320} +} + +@article{Cognitive, + author = { G. Ann Campbell}, + title = {COGNITIVE COMPLEXITY. A new way of measuring understandability}, + year = {2018}, + address = {}, + booktitle = {}, + pages = {}, + url_hidden = {https://www.sonarsource.com/docs/CognitiveComplexity.pdf}, + numpages = {}, + location = {}, + series = {} +} + +@inproceedings{Fil2015ACO, + title={A Catalogue of Thresholds for Object-Oriented Software Metrics}, + author={Tarc{\'i}sio G. S. Fil{\'o} and Mariza Andrade da Silva Bigonha}, + year={2015} +} + +@article{Shatnawi2010FindingSM, + title={Finding software metrics threshold values using ROC curves}, + author={Raed Shatnawi and Wei Li and James Swain and Tim Newman}, + journal={J. Softw. Maintenance Res. Pract.}, + year={2010}, + volume={22}, + pages={1-16} +} + +@article{Ouni2011MaintainabilityDD, + title={Maintainability defects detection and correction: a multi-objective approach}, + author={Ali Ouni and Marouane Kessentini and Houari A. Sahraoui and Mounir Boukadoum}, + journal={Automated Software Engineering}, + year={2011}, + volume={20}, + pages={47-79} +} + +@inproceedings{Fowler1999RefactoringI, + title={Refactoring - Improving the Design of Existing Code}, + author={Martin Fowler}, + booktitle={Addison Wesley object technology series}, + year={1999} +} + +@article{Din2012AntipatternsDA, + title={Antipatterns detection approaches in Object-Oriented Design: A literature review}, + author={Jamilah Din and Anas Bassam Al-Badareen and Yah Yusmadi Jusoh}, + journal={2012 7th International Conference on Computing and Convergence Technology (ICCCT)}, + year={2012}, + pages={926-931} +} + +@article{Kreimer2005AdaptiveDO, + title={Adaptive Detection of Design Flaws}, + author={Jochen Kreimer}, + journal={Electron. Notes Theor. Comput. Sci.}, + year={2005}, + volume={141}, + pages={117-136} +} + +@article{Palomba2015MiningVH, + title={Mining Version Histories for Detecting Code Smells}, + author={Fabio Palomba and Gabriele Bavota and Massimiliano Di Penta and Rocco Oliveto and Denys Poshyvanyk and Andrea De Lucia}, + journal={IEEE Transactions on Software Engineering}, + year={2015}, + volume={41}, + pages={462-489} +} + +@article{Liu2019DeepLB, + title={Deep Learning Based Code Smell Detection}, + author={Hui Liu and Jiahao Jin and Zhifeng Xu and Yifan Bu and Yanzhen Zou and Lu Zhang}, + journal={IEEE Transactions on Software Engineering}, + year={2019}, + pages={1-1} +} + +@article{Emden2002JavaQA, + title={Java quality assurance by detecting code smells}, + author={Eva Van Emden and Leon Moonen}, + journal={Ninth Working Conference on Reverse Engineering, 2002. Proceedings.}, + year={2002}, + pages={97-106} +} + +@article{Reeshti2019MeasuringCS, + title={Measuring Code Smells and Anti-Patterns}, + author={Reeshti and Rajni Sehgal and Renuka Nagpal and Deepti Mehrotra}, + journal={2019 4th International Conference on Information Systems and Computer Networks (ISCON)}, + year={2019}, + pages={311-314} +} + +@inproceedings{Friedman2001GreedyFA, + title={Greedy function approximation: A gradient boosting machine.}, + author={Jerome H. Friedman}, + year={2001} +} + +@article{Dorogush2018CatBoostGB, + title={CatBoost: gradient boosting with categorical features support}, + author={Anna Veronika Dorogush and Vasily Ershov and Andrey Gulin}, + journal={ArXiv}, + year={2018}, + volume={abs/1810.11363} +} + +@inproceedings{Kessentini2019UnderstandingTC, + title={Understanding the Correlation between Code Smells And Software Bugs}, + author={Marouane Kessentini}, + year={2019} +} + +@inproceedings{MurphyHill2010AnIA, + title={An interactive ambient visualization for code smells}, + author={Emerson R. Murphy-Hill and Andrew P. Black}, + booktitle={SOFTVIS '10}, + year={2010} +} + +@article{Vaucher2009TrackingDS, + title={Tracking Design Smells: Lessons from a Study of God Classes}, + author={St{\'e}phane Vaucher and Foutse Khomh and Naouel Moha and Yann-Ga{\"e}l Gu{\'e}h{\'e}neuc}, + journal={2009 16th Working Conference on Reverse Engineering}, + year={2009}, + pages={145-154} +} + +@inproceedings{Broy2006DemystifyingM, + title={Demystifying maintainability}, + author={Manfred Broy and Florian Dei{\ss}enb{\"o}ck and Markus Pizka}, + booktitle={WoSQ '06}, + year={2006} +} + +@book{gamma1995design, + title={Design patterns: elements of reusable object-oriented software}, + author={Gamma, Erich}, + year={1995}, + publisher={Pearson Education India} +} + +@book{mcconnell2004code, + title={Code complete}, + author={McConnell, Steve}, + year={2004}, + publisher={Pearson Education} +} + +@inproceedings{LenatFeigenbaum1987, +author = {Lenat, Douglas and Feigenbaum, Edward}, +year = {1987}, +month = {01}, +pages = {1173-1182}, +title = {On the Thresholds of Knowledge.}, +} diff --git a/wp/sections/appendix.tex b/wp/sections/appendix.tex new file mode 100644 index 00000000..2b821497 --- /dev/null +++ b/wp/sections/appendix.tex @@ -0,0 +1,425 @@ +\subsection*{Patterns Dictionary} + +\begin{itemize} + \item \pattern{Assert in code}{P1} + {If there is an assert statement in code block, and name of class doesn't end with Test, it is considered a pattern.} + {\it Example:} +\begin{lstlisting}[language=Java] +class Book { + void foo(String x) { + assert x != null; // here +} +\end{lstlisting} + + \item \pattern{Setter}{P2}{The method's name starts with set, then goes the name of the attribute. There are attributes assigning in the method. Also, asserts are ignored.} + {\it Examples:} +\begin{lstlisting}[language=Java] +class Book { + private String title; + void setTitle(String) { + this.title = t; + } +} +\end{lstlisting} + +\begin{lstlisting}[language=Java] +class Book { + private String title; + public void setIsDiscrete() { + assert !isDiscrete; + assert !x; //ignore it + this.isDiscrete = isDiscrete; + } +} +\end{lstlisting} + +\begin{lstlisting}[language=Java] +class Book { +private String isDiscrete; + + public void setIsDiscrete(String isDiscretem, boolean x) { + assert !isDiscrete; + assert !x; //ignore it + this.isDiscrete = isDiscrete; + } +} +\end{lstlisting} + +\begin{lstlisting}[language=Java] +class Book { + private String title; + + @Override + synchronized public void setConf(Configuration conf) { + this.conf = conf; + this.randomDevPath = conf.get( + HADOOP_SECURITY_SECURE_RANDOM_DEVICE_FILE_PATH_KEY, + HADOOP_SECURITY_SECURE_RANDOM_DEVICE_FILE_PATH_DEFAULT); + close(); \\ some minor changes also do not affect, it is still Setter pattern + } +} +\end{lstlisting} + + \item \pattern{Empty Rethrow}{P3}{We throw the same exception as it was caught.} + {\it Example:} +\begin{lstlisting}[language=Java] +class Book { + void foo() { + try { + File.readAllBytes(); + } catch (IOException e) { + // maybe something else here + throw e; // here! + } + } +} +\end{lstlisting} + + \item \pattern{ErClass}{P4}{ If a class name is one of the following (or ends with this word), it's the pattern: + + Manager, Controller, Router, Dispatcher, Printer, Writer, Reader, Parser, Generator, Renderer, Listener, Producer, Holder, Interceptor.} + + \item \pattern{Force type casting}{P5}{The force type casting considered as a pattern.} + {\it Example:} +\begin{lstlisting}[language=Java] +// casting to int is +public int square (int n) { + return (int) java.lang.Math.pow(n,2); +} +\end{lstlisting} + + \item \pattern{If return if detection}{P6}{If there is a return in if condition, it's a pattern.} + {\it Example:} + \begin{lstlisting}[language=Java] +class T1 { + public void main(int x) { + if (x < 0) { + return; + } else { + System.out.println("X is positive or zero"); + } + } +} +\end{lstlisting} + + \item \pattern{Implements Multi}{P7}{If a class implements more than 1 interface it's a pattern.} + {\it Examples:} + \begin{lstlisting}[language=Java] +public class AnimatableSplitDimensionPathValue implements AnimatableValue { + private final AnimatableFloatValue animatableXDimension; + private final AnimatableFloatValue animatableYDimension; + + public AnimatableSplitDimensionPathValue( + AnimatableFloatValue animatableXDimension, + AnimatableFloatValue animatableYDimension) { + this.animatableXDimension = animatableXDimension; + this.animatableYDimension = animatableYDimension; + } +} +\end{lstlisting} +\begin{lstlisting}[language=Java] +public class a implements A, B { +} +\end{lstlisting} + + \item \pattern{Using \texttt{instanceof} operator}{P8}{ Using of \texttt{instanceof} operator considered as pattern.} +{\it Examples:} + \begin{lstlisting}[language=Java] +public static void main(String[] args) { + Child obj = new Child(); + if (obj instanceof String) + System.out.println("obj is instance of Child"); +} +\end{lstlisting} +\begin{lstlisting}[language=Java] +class Test +{ + public static void main(String[] args) + { + Child cobj = new Child(); + System.out.println(b.getClass().isInstance(c)); + } +} +\end{lstlisting} + + \item \pattern{Many primary ctors}{P9}{If there is more than one primary constructors in a class, it is considered a pattern.} + {\it Example:} +\begin{lstlisting}[language=Java] +class Book { + + private final int a; + Book(int x) { // first primary ctor + this.a = x; + } + Book() { // second + this.a = 0; + } +} +\end{lstlisting} + + \item \pattern{Usage of method chaining more than one time}{P10}{If we use more than one method chaining invocation.} +{\it Example:} +\begin{lstlisting}[language=Java] +// here we use method chaining 4 times +public void start() { + MyObject.Start() + .SpecifySomeParameter() + .SpecifySomeOtherParameter() + .Execute(); +} +\end{lstlisting} + + \item \pattern{Multiple Try}{P11}{Once we see more than one try in a single method, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Foo { + void bar() { + try { + // some code + } catch (IOException ex) { + // do something + } + // some other code + try { // here! + // some code + } catch (IOException ex) { + // do something + } + } +} +\end{lstlisting} + + \item \pattern{Non final attributes}{P12}{Once we see a mutable attribute (without final modifier), it's considered a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + private int id; + // something else +} +\end{lstlisting} + + \item \pattern{Null checks}{P13}{If we check that something equals (or not equals) null (except in constructor) it is considered a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Foo { + private String z; + void x() { + if (this.z == null) { // here! + throw new RuntimeException("oops"); + } + } +} +\end{lstlisting} + + \item \pattern{Partial synchronized}{P14}{Here, the synchronized block doesn't include all statements of the method. Something stays out of the block.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + private int a; + void foo() { + synchronized (this.a) { + this.a = 2; + } + this.a = 1; // here! + } +} +\end{lstlisting} + + \item \pattern{Redundant catch}{P15}{Here, the method \texttt{foo()} throws IOException, but we catch it inside the method.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + void foo() throws IOException { + try { + Files.readAllBytes(); + } catch (IOException e) { // here + // do something + } + } +} +\end{lstlisting} + + \item \pattern{Return null}{P16}{When we return null, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + String foo() { + return null; + } +} +\end{lstlisting} + + \item \pattern{String concatenation using \texttt{+} operator}{P17}{Any usage string concatenation using \texttt{+} operator is considered as pattern match.} +{\it Example:} +\begin{lstlisting}[language=Java] +public void start() { + // this line is match the pattern + System.out.println("test" + str1 + "34234" + str2); + list = new ArrayList<>(); + for (int i = 0; i < 10; i++) + list.add(Boolean.FALSE); +} +\end{lstlisting} + + \item \pattern{Override method calls parent method}{P18}{If we call parent method from override class method it is considered as the pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +@Override +public void method1() { + System.out.println("subclass method1"); + super.method1(); +} +\end{lstlisting} + + \item \pattern{Class constructor except \texttt{this} contains other code}{P19}{The first constructor has this() and some other statements. This is the ``hybrid constructor'' pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + private int id; + Book() { + this(1); + int a = 1; // here + } + Book(int i) { + this.id = I; + } +} +\end{lstlisting} + + \item \pattern{Line distance between variable declaration and first usage greater then threshold}{P20\_5, P20\_7, P20\_11}{If line distance between variable declaration and first usage exceeds some threshold we consider it as the pattern. We calculate only non-empty lines. P20\_5 means that distance is 5.} +{\it Example:} +\begin{lstlisting}[language=Java] +// variable a declared and used with 2 lines distance +static void myMethod() { + string path1 = '/tmp/test1'; + int a = 4; + + string path2 = '/tmp/test2'; + string path3 = '/tmp/test3'; + a = a + 4; +} +\end{lstlisting} + + \item \pattern{Variable is declared in the middle of the method body}{P21}{All variable we need have to be declared at the beginning of its scope. If variable declared inside the scope following after logical blocks we consider that this is the pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +// The declaration of variable list is match pattern. +static void myMethod2() { + int b = 4; + b = b + 6; + List list = new List(); +} +\end{lstlisting} + + \item \pattern{Array as argument}{P22}{If we pass an array as an argument, it's a pattern. It's better to use objects, instead of arrays.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Foo { + void bar(int[] x) { + } +} +\end{lstlisting} + + \item \pattern{Joined Validation}{P23}{Once you see a validation (if with a single throw inside) and its condition contains more than one condition joined with OR -- it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + void print(int x, int y) { + if (x == 1 || y == 1) { // here! + throw new Exception("Oops"); + } + } +} +\end{lstlisting} + + \item \pattern{Class declaration must always be \texttt{final}}{P24}{Once you see a non \texttt{final} method, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + private static void foo() { + } +} +\end{lstlisting} + + \item \pattern{Private static method}{P25}{Once you see a \texttt{private static} method, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + private static void foo() { + //something + } +} +\end{lstlisting} + + \item \pattern{Public static method}{P26}{Once you see a \texttt{public static} method, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + private static void foo() { + //something + } +} +\end{lstlisting} + + \item \pattern{Var siblings}{P27}{Here fileSize and fileDate are ``siblings'' because they both have file as first part of their compound names. It's better to rename them to size and date.\\ + file and fileSize are NOT siblings.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Foo { + void bar() { + int fileSize = 10; + Date fileDate = new Date(); + } +} +\end{lstlisting} + + \item \pattern{Assign null}{P28}{Once we see \texttt{= null}, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Foo { + void bar() { + String a = null; // here + } +} +\end{lstlisting} + + \item \pattern{Multiple \texttt{while} pattern}{P29}{Once you see two or more \texttt{while} statements in a method body, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Book { + void foo() { + while (true) { + } + // something + while (true) { + } + } +} +\end{lstlisting} + + \item \pattern{Protected method}{P30}{Once we find a protected method in a class, it's a pattern.} + + \item \pattern{Send \texttt{null}}{P31}{Once we see that \texttt{null} is being given as an argument to some method, it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Foo { + void bar() { + FileUtils.doIt(null); // here + } +} +\end{lstlisting} + + \item \pattern{Nested loop}{P32}{Once we find a loop (\texttt{for} / \texttt{while}) inside another loop it's a pattern.} +{\it Example:} +\begin{lstlisting}[language=Java] +class Foo { + void foo() { + white (true) { + for (;;) { // here + } + } + } +} +\end{lstlisting} + +\end{itemize} \ No newline at end of file diff --git a/wp/sections/available_data.tex b/wp/sections/available_data.tex deleted file mode 100644 index 7f1eb922..00000000 --- a/wp/sections/available_data.tex +++ /dev/null @@ -1,36 +0,0 @@ -We considered available data in some open sources -related to the defect detection problem. Some datasets -have only code samples and labels, while others have -calculated some code metrics. - -ETH JavaScript Dataset\footnote{\url{https://www.sri.inf.ethz.ch/js150}} -includes 150,000 JavaScript code samples represented as AST trees. -The dataset has been used in a binary classification task. -Target positive examples were extracted from the code -and negative examples were generated using -Negative Sampling techique. The dataset -was used by~\citet{DeepBugs} in an attempt to find the following defects: -swapped arguments, wrong binary operator, and wrong binary operand. - -NASA Dataset\footnote{\url{http://promise.site.uottawa.ca/SERepository/datasets-page.html}} -includes 20 small-sized datasets (20 to 2000 samples in each one) of NASA projects. -It does not contain code itself but only code metrics. The data -was previously used for defect prediction, cost estimation, successful reuse, and requirements tracing. - -Defects4j Dataset\footnote{\url{https://github.com/rjust/defects4j/tree/additional-projects-4}} -includes code samples from different projects, -where each one has 52 defects of different -type. The datasets was built by scrapping bug reports and commits -that fixed them. This dataset does not have labels. - -BugLocalization Dataset:\footnote{\url{https://github.com/yanxiao6/BugLocalization-dataset/tree/master/dataset}} -has 18,500 Java defect report samples from five GitHub-hosted projects. -Each sample consists of defect description, a commit to fix the defect -and a link to GitHub source code file. - -CodeSearchNet dataset:\footnote{\url{https://github.com/github/CodeSearchNet}} -is a large dataset of over two million rows of code -collected from open source libraries. The dataset contains -Python, Javascript, Ruby, Go, Java, and PHP languages. -Each sample consists of a function code and a comment related -to the function. The dataset is built for a semantic code search task. diff --git a/wp/sections/conclusion.tex b/wp/sections/conclusion.tex new file mode 100644 index 00000000..e9b59f5a --- /dev/null +++ b/wp/sections/conclusion.tex @@ -0,0 +1,28 @@ +Aibolit is a recommender system that helps improve the quality of Java classes. +The recommendations are learned from OSS Java projects using ML methods. +Aibolit provides ranked recommendations for each specific Java class, +which differs Aibolit from others style checkers and makes it unique. + +Aibolit is an extendable system, allowing anyone to add new patterns and to +increase the training dataset and thus improve the precision and usefulness +of recommendations. Aibolit can also be used as a framework for analysis of +patterns and to decide whether any pattern, however subjective it is, is an anti- or a pro-pattern +with respect to a particular quality metric. As a complementary result, +we contribute a 100K+ dataset of patterns and metrics calculated for Java classes. + +The first version of Aibolit is relatively simple and there is room +for improvement. If the anti-pattern has found, we recommend to fix all instances +of the pattern in the code. Instead, we may consider each specific occurrence of the pattern. +We may exploit its relative position in the structure of the code, rather than just count +the frequency. Moreover, Aibolit inspects each Java class independently. But +we might consider the relations between classes in the future. Furthermore, +Aibolit's prediction model relies on patterns only. In order to improve the model, +we have to think about additional features, for example, information about +project domain or used frameworks. + +Aibolit is a firm step toward the next generation of tools to control +and improve software quality. It is a complementary tool for +product owners who already use tools to manage software quality. + + + diff --git a/wp/sections/considered.tex b/wp/sections/considered.tex deleted file mode 100644 index 087aad36..00000000 --- a/wp/sections/considered.tex +++ /dev/null @@ -1,126 +0,0 @@ -Even though detecting defects is an important task in any software project, -existing studies differ in their definition of the parameters and objectives -of this task, -partially due to the ambiguity of the definition of the software defect. -Among others, there are many methods of defect detection, which utilize ML. -Below we classify them from a few different perspectives, -in order to justify the selection made later in this document. - -\subsection{By the Origin of Data} - -As suggested by~\citet{10.1007/s10462-017-9563-5} -the model can be built using the data -\begin{enumerate*}[label=\arabic*)] -\item from the same release of a software module (\emph{intra-release}), -\item from a few different releases of the same module (\emph{inter-release}), or -\item from different software modules (\emph{cross-project prediction}). -\end{enumerate*} -It seems that this classification was not possible earlier, -when analytical data was kept proprietary and was not available -for the community of researchers, only until NASA released a large -evidential dataset in 2004~\citep{Sayyad-Shirabad+Menzies:2005}. - -It it interesting to mention that -in early 2000s most authors assumed that ``distributions'' in both -training and testing datasets were similar~\citep{Turhan2009} (\emph{intra-release}), which -mislead them because the assumption was wrong. - -Later,~\citet{5609530} introduced ``ensemble techniques,'' which made the analysis -possible in situations when software modules do not have similar -distributions in their training and testing datasets (\emph{inter-release}). - -Recently,~\citet{Better_cross} demonstrated that defect detection may work across -different software modules, using their historical data (\emph{cross-project release}). - -\subsection{By the Output} - -ML-based defect detection methods can be classified by the type of the output they produce: - -First, the output could be \emph{binary}, which means that -software modules (like classes, methods, packages, etc.) are classified -either as faulty or non-faulty ones, as done by~\citet{Gokhale97regressiontree, Menzies04assessingpredictors}. - -Second, \emph{defect density} or the \emph{number of defects} -may be reported per module, demonstrating the degree of their defectiveness, -as suggested by~\citet{Predicting_Fault, JANES20063711, 7510216}. - -Third, \emph{defect severity prediction} may be reported per module, demonstrating -the impact of defects found in the module to the end-user experience, -as suggested by~\citet{7510216, SHATNAWI20081868, 1717471}. - -Fourth, the output could contain information about a specific location -of the defect inside the source code (the line of code with a defect), -as suggested by~\citet{vasic2019neural}. - -\subsection{By the Input} - -\textbf{Software metrics} can be used as an input. -They can be obtained from the source code, -and can belong to one of the following categories: - -\begin{itemize} -\item \emph{Product metrics} are generally used to check -whether software follows~\citep{InternationalStandardOrganization}, -as it was mentioned by~\citet{10.1007/s10462-017-9563-5}, -and can be classified as suggested by~\citet{10.5555/540137}: -\begin{enumerate*}[label=\arabic*)] -\item \emph{Traditional} metrics may include size, -system complexity~\citep{1702388}, and others; -\item \emph{Object-oriented} metrics may include coupling, -cohesion and others specifically related to object-oriented -methodology, as suggested by~\citet{979986, Incorporating_transitive}; -\item \emph{Dynamic} metrics are gathered from a running -program and demonstrate the behavior of a software component -during its execution~\citep{MITCHELL20064}. -\end{enumerate*} - -\item \emph{Process metrics} such as number of modules changed for a bug-fix, - work products delivered, and so on may also be used - as an input~\citep{The_IT_Measurement_Compendium, 10.1109/ISSRE.2010.25}. -\end{itemize} - -\textbf{AST} can also be used -as an input, which is a represention of the syntax of a -software code snippet as a tree-like structure. -E.g.,~\citet{6676914} computes the set of AST changes between two source -code files using algorithm, published by~\citet{4339230}. - -\subsection{By the ML Model} - -There are plenty of studies for bug detection problem -in which ML methods are used. ML methods -for solving defect detection problem can be classified into the following groups: - -\begin{enumerate}[label=\arabic*)] -\item \emph{Unsupervised learning} is a learning model which uses unlabeled data. -The goal of unsupervised methods is to uncover unknown patterns in the data. -Unsupervised learning for solving defect detection problem includes such algorithms -as K-Means clustering cleaning approach~\citep{8952192}, KNN~\citep{8777507}. - -\item \emph{Supervised learning} uses labeled data to produces a function, -which maps unseen data to labels. Supervised methods for defect detection includes -\citep{10.1145/1137983.1138012}, Naive Bayes~\citep{4027145}, Random forest -\citep{1383136, 7476673}, nearest neighbor -\citep{10.1145/2786805.2786813}, SVM~\citep{ELISH2008649, 10_1007}, -neural network~\citep{THWIN2005147THWIN2005147},~\citep{1033229, 10.1145/3360588} and ensemble techniques~\citep{Ensemble_Techniques}. -Some authors tried to combine different machine learning techniques -(Linear Regression, Neural Network for continuous goal field, Naive Bayes, etc.) -with statistical techniques, such as PCA~\citep{1544801}. - -\item \emph{Semi-supervised learning} uses both labeled and unlabeled data and -is a combination of supervised and unsupervised methods. -It can help unsupervised methods to obtain better-defined clusters, -as explained by~\citet{semi_supervised_learning, 7965301, Zhang2017}. - -\item \emph{Deep learning} is a class of ML algorithms, which extracts -features from lower layers in order to use them in higher ones. -Textual information (for example, English bug reports -or source files comments and names of variables) can be used in deep learning. -Using this information, deep learning -can achieve better performance than many state-of-the-art approaches. -For example,~\citet{8616596} proposed a different method based -on Long Short-Term Memory (LSTM) networks -and~\citet{XIAO201917} suggested to use -convolutional Neural Network together -with word-embedding and feature-detecting techniques. -\end{enumerate} diff --git a/wp/sections/empirical_results.tex b/wp/sections/empirical_results.tex deleted file mode 100644 index 0536dd4b..00000000 --- a/wp/sections/empirical_results.tex +++ /dev/null @@ -1,13 +0,0 @@ -In our first experiment we collected dataset of Java classes and calculated -metrics and patterns for them. We calculated the readability using open-source -tool. Then we estimate the impact features to readability and conclude that -\emph{ncss\_method\_avg}, \emph{halstead\_volume}, and \emph{right\_spaces\_var} are -most important features. The more details you can find in \emph{Report 1}. - -In second experiment we figured out non-zero correlation between the -non-structural patterns and the code complexity metrics like \emph{Cyclomatic Complexity} -and \emph{NPath}. We observe that \emph{var\_in\_the\_middle} shows -highest impact on complexity. We explain it by the fact that the more complex -programs will tend to have more variable and some of them declared in the middle -of the method body. The more details you can find in \emph{Report 2}. - diff --git a/wp/sections/how_aibolit_works.tex b/wp/sections/how_aibolit_works.tex new file mode 100644 index 00000000..940f3607 --- /dev/null +++ b/wp/sections/how_aibolit_works.tex @@ -0,0 +1,235 @@ +\subsection{The Idea} + +The main purpose of Aibolit is to help developers identify patterns in their +code that may cause maintainability issues. From the user perspective, it works +by outputting a list of patterns recommended to remove, given a Java class. The +Aibolit engine is comprised of two parts: an ML regression model and a +recommendation algorithm. The regression model predicts maintainability of any +Java class. The recommendation algorithm uses the regression model to decide +which pattern is better to avoid by considering different various modifications +of the input Java class. + +\begin{figure}[t] +\includegraphics[width=13cm]{how_it_works_diagram_5.jpg} +\centering +\vspace{1 cm} +\caption{How Aibolit works: (1) Inspect the source code for patterns. +(2) Count the pattern occurrences and put them in a vector representation. +Compute the maintainability metric value for the vector. +(3) Consider changes to the vector representation by subtracting pattern counts. +Predict the corresponding maintainbility metric score. +(4) Rank all the alternative vectors wrt to how much they improve the original +maintainability score. Recommend changes that gave the most improvement.} +\label{fig:aibolit_graphic} +\end{figure} + +Figure~\ref{fig:aibolit_graphic} represents the Aibolit recommendation proceduce on high level. +In the remaining subsections we provide a more detailed +description of each of Aibolit's components. + +\subsection{Patterns \& Quality metrics} +\label{sec:aibolit_patterns_metrics} + +\subsubsection*{Patterns} + +As discussed above (Section~\ref{sec:related}), +software engineering researchers and practitioners often associate +good and bad code design with specific patterns. We follow this tradition and +build a predictive system to reason about observed patterns in code in terms of their effect on quality. +In the current release of Aibolit, the model is built on top of 34 +commonly used manually designed patterns as input features. See Appendix +for the complete list and detailed descriptions. Note that users of Aibolit +can arbitrarily extend the model by implementing and integrating their patterns +of choice. + +\subsubsection*{Metrics} + +The ultimate goal behind Aibolit's approach is to +learn to identify maintainability-affecting patterns in code and recommend them +to the user. However, as discussed above, maintainability quantification is +still an open problem, and most of the metrics proposed so far typically +describe only a narrow aspect of software maintainability. We recognize it as +the major challenge of our approach and plan to research this problem in the +future. + +In the current release of Aibolit, we use Cognitive Complexity \citep{10.1145/3194164.3194186} as +the maintainability metric. We refer to it as the maintainability metric or just +metric in the remainder of the text. + +\subsection{Maintainability prediction model} + +\label{sec:maint_pred_model} + +\subsubsection*{Dataset} + +\label{sec:dataset} + +To train our prediction model, we mined training data +from Github open source repositories. We chose repositories written in Java +as the main language. We filtered out all non-Java files and all software +testing files. To make sure our data is +representative of good software engineering standards, we only extracted +repositories with at least 100 starts and at least six collaborators. + +Aibolit is currently designed to do predictions and recommendations at class +level. For simplicity, we only consider files that contain exactly one +non-abstract Java class. +%We chose this level of granularity because it is intuitive and because we +%wanted to narrow down our scope for the first stage of development. Thus one +%datapoint in our dataset is one Java class. +We filtered out classes with fewer than 50 and more than 300 lines of code. +The resulting filtered dataset consists of 124 repositories and +29,065 classes. Before filtering, we split the dataset into a test and train sets randomy by files with +approximately 0.7:0.3 ratio. The filtered train set contains 20,049 classes, +the filtered test contains 9,016. +%More detailed statistics can be found in Table~\todo. -- keep this for research paper +The complete list of mined repositories and the train/test split is provided in Aibolit +project folder. +%(\todo link to file with repository URLs). + +\subsubsection*{Feature and target preprocessing} + +Each Java class gets +associated with a vector of numerical features. We use scaled pattern counts as +features. For each pattern from the fixed set +(see Appendix), we count its occurrences in the +class and divide it by the number of non-commented lines in the class (NCSS). +The target value for each datapoint is the maintainability metric value for the +class (Section~\ref{sec:aibolit_patterns_metrics}), also divided by the NCSS. +%(\ref{eq:feat_vector}) and (\ref{eq:targ_val}) summarize feature and target +%value computation (where $f_p^C$ is the feature value for pattern $p$ and code +%$C$, $t^C$ is the target value for code $C$ ). +Table~\ref{tab:features_example} +gives an illustration of how a dataset looks like after the this procedure. + + +%\vspace{0.2in} +% +%\begin{minipage}{.4\linewidth} \begin{equation} \label{eq:feat_vector} f_p^C = +%\frac{\textit{count}(p, C)}{\textit{NCSS}(C)}\end{equation} \end{minipage}% +%\begin{minipage}{.55\linewidth} \begin{equation} \label{eq:targ_val} t^C = +%\frac{\textit{Metric}(C)}{\textit{NCSS}(C)} \end{equation} \end{minipage} +% +%\vspace{0.2in} + + +We apply NCSS scaling because we observed that our maintainability metric +(Cognitive Complexity) is highly correlated with code size. The scaling +stimulates the model to find more implicit dependencies between patterns and +complexity. + +\begin{table}[H] \begin{center} \begin{tabular}{|r|rrrr|r|} \hline \textbf{class +id} & \textbf{P16} & \textbf{P11} & \textbf{P13} & \dots & \textbf{CogC} +(target metric) \\ \hline \hline class 1 & 0.008695 & 0. & 0.026086 & +\dots& 0.417391 \\ class 2 & 0. & 0.05 & 0.116667 & \dots& +0.466667 \\ class 3 & 0.009909 & 0. & 0.009909 & \dots & 0.732673 \\ +\hline \end{tabular} \end{center} \caption{Example of a training dataset with +preprocessed feature values. \textbf{P16}: {\em Return null}, \textbf{P11}: {\em +Multiple Try}, \textbf{P13}: {\em Null checks}).} \label{tab:features_example} +\end{table} + + + +\subsubsection*{Training} +We train a gradient boosting regression model \citep{Friedman2001GreedyFA}. +We use the implementation of CatBoost \citep{Dorogush2018CatBoostGB} with the RMSE loss function. +For hyperparameter selection, we do a 3-fold crossvalidation. + + +\subsection{Recommendation algorithm} +\label{sec:recommendation_algorithm} +Our recommendation algorithm ranks +patterns observed in the user's source class according to their individual +impact on the code's maintainability metric value. It then outputs a pattern +with the \textit{most negative impact} as a recommendation to the user to remove +it from their code. + +For each pattern $p$ we compute the \textbf{impact factor} $I_{\textit{neg}}(p, C)$ +on code $C$, which is intended to capture the \textit{negative +influence} of $p$ on $C$. It is the difference between the quality metric value +of the original code $C$ and the version of $C$ where the count of $p$ has been +decreased (Eq.~\ref{eq:impact_factor}): + +\begin{equation} \label{eq:impact_factor} I_{\textit{neg}}(p_i, C) = M(F(C)) - +M(F_{p_{i} - 1}(C)), \end{equation} + +where $M$ is a quality metric, $F(C)$ is the feature vector $\langle f^C_{p_1}, +..., f^C_{p_n} \rangle$, $F_{p_{i} - 1}(C)$ is the feature vector with the count +of $p_i$ decreased by $1$.%: $\langle f^C_{p_1}, ..., f^C_{p_i - 1}, ..., +%f^C_{p_n} \rangle$. With $f^C_{p_i - 1}$ computed as:\footnote{We chose to +%subtract 1, but in principle it is a hyperparameter of our model and we plan to +%experiment with other values.} + +%\begin{equation} \label{eq:feature_count_min_1} f_{p_i - 1}^C = +%\frac{\textit{count}(p_i, C) - 1}{\textit{NCSS}(C)} \end{equation} + +Under the ``lower metric is better'' convention (i.e., lower value of quality metric +means better quality), lower values of $I_{\textit{neg}}$ correspond to patterns +that contribute more to the deterioration of the code's quality. We rank pattern +according to their $I_{\textit{neg}}$ and output patterns with lowest values as +recommendations. + +Note that at the moment of recommendation we do not observe code with a +decreased pattern count, so we cannot compute the maintainability metric +directly. This is why we resort to a predictive maintainability model +(Section~\ref{sec:maint_pred_model}), which helps estimate maintainability of a +hypothetical code. + + + +\textbf{Algorithm}~\ref{fig:recsys_alg} summarizes how we do recommendations. +For each pattern from the set of patterns used at training, we +precompute feature values and compute the metric value of the source code +(lines~\ref{line:init_F}-\ref{line:compute_m_source}). Then we compute the +impact factor $I_p$ of each pattern $p$ on the source code maintainability +(lines~\ref{line:init_I}-\ref{line:impact}). Under ``lower is better'' +convention of maintainability metric, low values of $I_p$ indicate that removal +of $p$ lead to improvement of the metric score. We collect the $K$ most +negatively impacting patterns and output them as recommendations. Thus, we + pick patterns $p$ for which $I_p$ are the highest +(lines~\ref{line:topK}-\ref{line:return}). + +\begin{algorithm}[t] +\caption{Aibolit recommendation algorithm} +\hspace{\algorithmicindent} +\textbf{Input:} $\mathsf{M}$: pretrained +maintainability model); $C$: class source code; \\ +\hspace{\algorithmicindent} $P$: array of patterns used for training $\mathsf{M}$\\ + +\begin{algorithmic}[1] +\State $F = [ ]$ \label{line:init_F} +\For{\texttt{i = 1, $|P|$}} +\State $F[i] =\frac{\textit{count}(P[i], C)}{\textit{NCSS}(C)}$% (Eq.~\ref{eq:feat_vector}) +\EndFor \State $M_{\textit{observed}} = \mathsf{M}(F)$ \label{line:compute_m_source} +\State $I = [ ]$ \label{line:init_I} +\For{\texttt{i = 1, $|P|$}} + +\State $F^{\prime} = F$ +\State $F^{\prime}[i] = F[i] - \frac{1}{\textit{NCSS}(C)}$ \label{line:f_prime} %(Eq.~\ref{eq:feature_count_min_1}) +\State $I[i] = M_{\textit{observed}} - \mathsf{M}(F^{\prime})$ \label{line:impact} % (Eq.~\ref{eq:impact_factor}) +\EndFor +\State $I_{\textit{worst}} = \texttt{topK}_{i \in [1,...,|P|]} (-I[i])$ \label{line:topK} +\State \textbf{return} $\{P[i]~|~i \in I_{\textit{worst}}\}$ \label{line:return} + +\end{algorithmic} +\centering +\label{fig:recsys_alg} +\end{algorithm} + + +% \subsection{How to customize Aibolit} label{sec:customizing_aibolit} + +% By design, Aibolit is easily adjustable and extendable. It gives an end user +% the opportunity to adapt the tool to their own requirements and preferences. +% Aibolit's core mechanism is ML-driven, therefore, as the user adds new +% patterns to the system, there is no need to manually specify how the pattern +% should be used by the tool. The interactions between patterns are discovered +% automatically by the learning algorithm. + +% In order to modify the set of patterns, the user should provide an +% implementation of a pattern extractor for source code file and modify the +% configuration file (\verb|aibolit/config.py|) accordingly. See full +% instructions in \todo (shouldn't we add them in README?) + +% In order to change the quality metric for training the prediction model, ??? +% \todo. diff --git a/wp/sections/introduction.tex b/wp/sections/introduction.tex index c9505f08..d3643efa 100644 --- a/wp/sections/introduction.tex +++ b/wp/sections/introduction.tex @@ -1,39 +1,74 @@ -% What is quality of code and why it's important? -% What is static analysis? +Insufficient software quality may result in increased development costs and +negatively affect customer satisfaction ~\citep{The_Economics_of_Software_Quality}. +\textit{Static code analysis} develops techniques to help detect software quality +issues prior to program execution. It has its practical application in various developer's tools. There are both open source\footnote{PMD: \url{http://pmd.sourceforge.net/}, Rubocop: \url{https://github.com/rubocop-hq/rubocop}, +PHPCS: \url{https://github.com/squizlabs/PHP_CodeSniffer} +FindSecBugs: \url{https://find-sec-bugs.github.io/}, ESLint: \url{https://eslint.org/}, Checkstyle: \url{https://checkstyle.sourceforge.io/}.} +(PMD, Rubocop, PHPCS, FindSecBugs, ESLint, Checkstyle, to name a few) and commercial\footnote{IBM Security AppScan: \url{https://www.hcltechsw.com/wps/portal/products/appscan}, +PVS-Studio: \url{https://www.viva64.com/en/pvs-studio/}, +SonarQube: \url{https://www.sonarqube.org/}, +Parasoft: \url{https://www.parasoft.com/}} +(IBM Security AppScan, PVS-Studio, SonarQube, Parasoft) static analyzers +on the market. -% What is ML? +Static code analysis can be applied to improve an \textit{internal} and an +\textit{external} quality of software \citep{Ilyas2016StaticCA}. External +quality is related to defects encountered by the end user of the software +product. Within internal quality, two important subcategories are +\textit{functional quality} and \textit{maintainability}. Functional quality is +about code correctness and compliance with the functional software +specifications \citep{Farhan}. Code maintainability is about how easy it is to +analyze, modify and adapt given software \citep{Mohammadi2013AnAO}. -% What is the objective of using ML for defect detection? +Functional quality aspects are typically quite susceptible to formal definition +and quantification. +% (\todo: examples!!). +Functional quality is also an essential +requirement in any domain of software development. On the other hand, +maintainability is a lot less straightforward to formally specify or quantify. +%\todo: refs. +Also, in certain applications it appears less important than +functional correctness, although in business domain it is recognized as an +essential property. +% (\todo: ref). +As a result, there are currently a lot more +research and practical tools addressing functional quality aspects of code than +maintainability \citep{Overview_Static_Code_Analysis_in_Software_Development}. +Another aspect of static analysis tools that may have hindered their application +to maintainability, is that they are predominantly rule-based. Since there has +not yet been a consensus on how to formalize maintainability, it is challenging +to devise a set of formal rules to detect it. -% What is the structure of this document? +We designed our new tool Aibolit to help developers identify patterns in their +code that may cause maintainability issues. It is a next generation static +analysis tool that uses a machine learning (ML) model as an underlying quality +prediction mechanism. From the perspective of ML, our product is a recommender +system. For a given class file, it gives suggestions to the developer to alter +their code. The recommendations come in the form of \textit{code patterns} that +are detected in the code and advised to be removed. + +Our choice to design Aibolit as a ML-based system alleviates some important +shortcomings of rule-based static analyzers. By design, ML algorithms capture +statistical relations in the external world (data). Therefore, they can be a +good way to model imprecisely and subjectively defined properties of code, such +as its maintainability. Moreover, rule-based system are known to not scale well +to the diversity of empirically observed cases, and they tend to get very hard +to extend and maintain \citep{LenatFeigenbaum1987}. The ML +approach does not require +manual system adaptation as new observations or new features (patterns) come +along. In fact, Aibolit provides an easy way for developers to integrate a code +pattern of their liking into the recommender system and to analyze the pattern`s +impact on code quality. -Last years we see that the number of papers, which are trying to apply ML to source -code analysis, is growing. The researchers, encouraged by the success -of ML, deep learning, and Natural Language Processing (NLP), -are trying to adopt similar approaches to the code analysis. -In our new tool we make an attempt to automatically detect defects -in a software source code using ML and existing source code metrics, -such as Cyclomatic Complexity, cohesion, Number of Incoming Invocations, -and others. -In Section~\ref{sec:related} we analyze existing tools and methods -of static analysis and identify the most important issues currently -present in modern instruments. -In Section~\ref{sec:categories} we classify existing ML-based -static analysis ideas by the type of input they expect, by the output -they produce, and some other qualities. -In Section~\ref{sec:data} we make an attempt to summarize the situation -with data availability on the market and give a few examples of the -most notable datasets freely available for researchers. -In Section~\ref{sec:criteria} we identify the most important criteria -for the selection of the method and the tool for ML-based static analysis. -In Section~\ref{sec:method} we introduce our own method of detecting -defects in the source code using ML and software code metrics. +% This document explains how Aibolit works and what makes it novel and +% different from other static analyzers (Section~\ref{sec:method}). +% We further show how Aibolit can be extended +% with custom patterns and metrics. We discuss the current shortcomings and the way +% they can be addressed in future work (Section~\ref{sec:conclusion}). -In Section~\ref{sec:risks} we identify a number of risks we forecast -for the project and expect to mitigate. \ No newline at end of file diff --git a/wp/sections/motivation.tex b/wp/sections/motivation.tex new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/wp/sections/motivation.tex @@ -0,0 +1 @@ + diff --git a/wp/sections/notes.tex b/wp/sections/notes.tex deleted file mode 100644 index 2734b441..00000000 --- a/wp/sections/notes.tex +++ /dev/null @@ -1,39 +0,0 @@ -Code readability is closely related to defect detection problem. There are plenty of -studies where the problem of code readability is considered -\citep{8651396, xxx66666444, 10.1145/1985441.1985454}. At the moment, it is -still the issue since the problem is too subjective. - -It is interesting that \citet{10.1109/ICPC.2019.00014} tried to evaluate -the models for code readbility problem. The authors extracted 548 commits -from 63 engineered Java software projects. The authors identified a commit as a readability commit -when the message of the commit directly indicated it. E.g., -the authors searched such words of the commit as ``readable'', -``readability'', ``easier to read'', ``understand'', etc. -Several state of the art readability models were reviewed by \citet{10.1109/ICPC.2019.00014} -and it was discovered, that the models failed to capture readability improvements. -The authors also suggested several metrics which were not considered in the reviewed state-of-art papers. -The authors believe that those metrics for code readability can be used to detect readabilty -changes more efficiently. - -It is important to notice that code readabilty also can have a correlation -with complexity of the code. \citet{10.1109/TSE.2009.70} compared code readability -and software complexity. They used tools, published by \citet{xxx66666444, Readabil74:online}, to -compute metrics for code readability like \emph{The Automated Readability Index}, -\emph{The Simple Measure of Gobbledygook}, \emph{Flesch-Kincaid Readability Index} -\emph{The Gunning's Fog Index}, \emph{Coleman-Liau Index and Buse Readability Score}. -Also, they computed complexity software metrics, such as \emph{Halstead Complexity Volume}, -\emph{McCabe's Cyclomatic Complexity}. The authors found out that there was a -negative correlation between the readability and complexity metrics. -It means that low readability increases program complexity and vice versa. -The authors also mentioned that the languages constructions as comments, spacing, -while loop, meaningful names and do-while loop affects the code readiblity the most. -The authors also published a dataset with Java code which can help to detect defects. - -\citet{10.1007/978-3-319-95171-3_32} proposed a model which quantitatively measures the readability -of source code. The first idea is the using metrics (\emph{LOC, ProgramVolume, Entropy}) -as the key indicators which affect the source code readability. The second -idea is that the authors introduced the equation for quantitative measure -the source code readability in real time. This measure can demonstrate -how small changes in the code affect the readability of the code. -The authors also tried to optimize the model and they reached about -74.59\% of explanatory power. diff --git a/wp/sections/pattern_emp_analysis.tex b/wp/sections/pattern_emp_analysis.tex new file mode 100644 index 00000000..5438f084 --- /dev/null +++ b/wp/sections/pattern_emp_analysis.tex @@ -0,0 +1,124 @@ +\subsection{Empirical analysis of patterns} + +As a by-product of Aibolit's ML and recommendation +engine, we get a tool for empirically analysing different patterns' impacts on the target quality metric. Just +like in the main recommendation algorithm (Algorith~\ref{fig:recsys_alg}, Section~\ref{sec:recommendation_algorithm}), we can estimate whether a particular pattern has a positive or negative impact on the quality metric by considering modifications of source code where pattern count is decreased or increased. We perform such a procedure on a heldout set, which allows to estimate the average impact of a particular pattern on quality. + +In Table~\ref{tab:pattern_analysis} we present a case study of the 34 patterns used at training of Aibolit (see Appendix for the pattern descriptions). On a separate test set (see details in Section~\ref{sec:dataset}), for each pattern, we considered increasing and decreasing the pattern's count by 1. We used the pretrained Aibolit's regression model to predict the corresponding change in quality metric. + + +\begin{table}[ht] +\footnotesize +\begin{tabular}{lllllll} +patterns & p- m- & p+ m+ & p- m+ & p+ m- & p- m= & p+ m= \\ +\\ \hline +Asserts & 92 & 61 & 186 & 219 & 5 & 3 \\ +Setters & 245 & 160 & 901 & 976 & 21 & 31 \\ +Empty Rethrow & 77 & 65 & 25 & 36 & 0 & 1 \\ +Prohibited class name & 617 & 311 & 218 & 522 & 0 & 2 \\ +Force Type Casting & 2363 & 2313 & 1742 & 1790 & 14 & 16 \\ +Count If Return & 969 & 883 & 214 & 298 & 0 & 2 \\ +Implements Multi & 459 & 320 & 264 & 403 & 0 & 0 \\ +Instance of & 1396 & 1374 & 151 & 173 & 6 & 6 \\ +Many primary constructors & 19 & 19 & 604 & 605 & 2 & 1 \\ +Method chain & 573 & 574 & 2217 & 2214 & 43 & 45 \\ +Multiple try & 371 & 239 & 259 & 391 & 0 & 0 \\ +Non final attribute & 1249 & 1185 & 5835 & 5839 & 127 & 187 \\ +Null check & 5863 & 5978 & 575 & 457 & 9 & 12 \\ +Partial synchronized & 46 & 49 & 155 & 149 & 0 & 3 \\ +Redundant catch & 84 & 46 & 38 & 79 & 4 & 1 \\ +Return null & 1290 & 813 & 926 & 1401 & 4 & 6 \\ +String concat & 1596 & 2089 & 1506 & 1012 & 23 & 24 \\ +Super Method & 212 & 275 & 1012 & 949 & 10 & 10 \\ +This in constructor & 15 & 42 & 72 & 45 & 0 & 0 \\ +Var declaration distance for 5 lines & 1976 & 1464 & 738 & 1244 & 19 & 25 \\ +Var declaration distance for 7 lines & 1190 & 959 & 733 & 949 & 16 & 31 \\ +Var declaration distance for 11 lines & 703 & 603 & 365 & 466 & 10 & 9 \\ +Var in the middle & 3372 & 3117 & 2799 & 3046 & 37 & 45 \\ +Array as function argument & 882 & 768 & 351 & 464 & 1 & 2 \\ +Joined validation & 232 & 226 & 14 & 26 & 8 & 2 \\ +Non final class & 7141 & 3201 & 3723 & 7663 & 0 & 0 \\ +Private static method & 529 & 541 & 667 & 648 & 2 & 9 \\ +Public static method & 1166 & 1212 & 1142 & 1088 & 4 & 12 \\ +Null Assignment & 1245 & 803 & 717 & 1150 & 6 & 15 \\ +Multiple While & 120 & 97 & 16 & 39 & 0 & 0 \\ +Protected Method & 868 & 402 & 1147 & 1613 & 7 & 7 \\ +Send Null & 556 & 325 & 1510 & 1733 & 5 & 13 \\ +Nested Loop & 580 & 527 & 29 & 81 & 1 & 2 \\ +\hline +\end{tabular} +\centering +\caption{Empirical analysis of patterns. For each source code in the test set, we consider encreasing (\textbf{p+}) and decreasing (\textbf{p-}) pattern count by 1. And we recorded whether the maintainability metric increased (\textbf{m+}) or decreased (\textbf{m-}) as a result of that (where lower is better). In some cases the metric did no change (\textbf{m=}). The values in the cells are counts of cases.} +\label{tab:pattern_analysis} +\captionsetup{font=scriptsize} +% \caption*{ +% We use the following notation into named columns: +% ($p-$): decrease pattern by $\frac{1}{ncss}$; +% ($p+$): increase pattern by $\frac{1}{ncss}$; +% ($c-$): complexity has been decreased; $c+$: complexity has been increased; +% ($c=$): complexity has been not changed; +% (\emph{-1(top1)}): decreasing of pattern shows best \emph{CogC} improvement; +% (\emph{+1(top1)}): increasing of pattern shows best \emph{CogC} improvement. +% } +% \caption*{ +% We use the following notation into named columns: \\ +% \\ +% \centering +% \begin{tabular}{rl} +% $p-$ & decrease pattern by $\frac{1}{ncss}$ \\ +% $p+$ & increase pattern by $\frac{1}{ncss}$ \\ +% $c-$ & complexity has been decreased \\ +% $c+$ & complexity has been increased \\ +% $c=$ & complexity has been not changed \\ +% \emph{-1(top1)} & decreasing of pattern shows best \emph{CogC} improvement \\ +% \emph{+1(top1)} & increasing of pattern shows best \emph{CogC} improvement \\ +% \end{tabular} +% } +\end{table} + +Based on the statistics in Table~\ref{tab:pattern_analysis}, it appears that \emph{Prohibited class name}, \emph{Count If Return}, \emph{Instance of}, \emph{Null check}, \emph{Nested Loop}, +\emph{Array as function argument}, \emph{Joined validation} are \textbf{anti-patterns}, since their count decrease tends to improve the metric. Another group of patterns are \emph{Setters}, \emph{Many primary constructors}, \emph{Method chain}, \emph{Non final attribute}, \emph{Super Method}, \emph{Send Null patterns}: for them we observe that decresing them causes the metric to deteriorate, and increasing causes the metric to improve. We consider them \textbf{pro-patterns}. The third group (the rest of the patterns) can both improve and deteriorate the metric. We restrain oursevles from calling them either anti- or pro-patterns. + +Attempting to interpret the results, we observe that ``true'' anti-patterns usually have \emph{if/else condition} or cycle in their definition. +E.g., \emph{Null check} always checks for a null, \emph{Count If Return}, \emph{Instance of}, +\emph{Joined validation} have always \emph{if condition}, \emph{Nested Loop} has always at least 1 loop inside. Given that so far we have worked with the Cognitive Complexity metric, it is no surprise that those patterns affect it (\emph{if/else condition} or cycle are the main contributors into \emph{CogC}). Despite this limitation of the present analysis, we believe the proposed \textit{method} itself can be very useful in software engineering practice and research. + + + + +% ORIG TABLE: +% patterns & -1(top1) & +1(top1) & p- m- & p+ m+ & p- m+ & p+ m- & p- m= & p+ m= \\ +% \\ \hline +% Asserts & 0 & 100 & 92 & 61 & 186 & 219 & 5 & 3 \\ +% Setters & 1 & 113 & 245 & 160 & 901 & 976 & 21 & 31 \\ +% Empty Rethrow & 1 & 4 & 77 & 65 & 25 & 36 & 0 & 1 \\ +% Prohibited class name & 80 & 24 & 617 & 311 & 218 & 522 & 0 & 2 \\ +% Force Type Casting & 69 & 24 & 2363 & 2313 & 1742 & 1790 & 14 & 16 \\ +% Count If Return & 311 & 26 & 969 & 883 & 214 & 298 & 0 & 2 \\ +% Implements Multi & 24 & 244 & 459 & 320 & 264 & 403 & 0 & 0 \\ +% Instance of & 211 & 6 & 1396 & 1374 & 151 & 173 & 6 & 6 \\ +% Many primary constructors & 0 & 343 & 19 & 19 & 604 & 605 & 2 & 1 \\ +% Method chain & 3 & 203 & 573 & 574 & 2217 & 2214 & 43 & 45 \\ +% Multiple try & 156 & 180 & 371 & 239 & 259 & 391 & 0 & 0 \\ +% Non final attribute & 34 & 485 & 1249 & 1185 & 5835 & 5839 & 127 & 187 \\ +% Null check & 1573 & 14 & 5863 & 5978 & 575 & 457 & 9 & 12 \\ +% Partial synchronized & 1 & 93 & 46 & 49 & 155 & 149 & 0 & 3 \\ +% Redundant catch & 6 & 2 & 84 & 46 & 38 & 79 & 4 & 1 \\ +% Return null & 104 & 40 & 1290 & 813 & 926 & 1401 & 4 & 6 \\ +% String concat & 43 & 126 & 1596 & 2089 & 1506 & 1012 & 23 & 24 \\ +% Super Method & 1 & 174 & 212 & 275 & 1012 & 949 & 10 & 10 \\ +% This in constructor & 2 & 891 & 15 & 42 & 72 & 45 & 0 & 0 \\ +% Var declaration distance for 5 lines & 396 & 14 & 1976 & 1464 & 738 & 1244 & 19 & 25 \\ +% Var declaration distance for 7 lines & 25 & 95 & 1190 & 959 & 733 & 949 & 16 & 31 \\ +% Var declaration distance for 11 lines & 16 & 686 & 703 & 603 & 365 & 466 & 10 & 9 \\ +% Var in the middle & 118 & 50 & 3372 & 3117 & 2799 & 3046 & 37 & 45 \\ +% Array as function argument & 86 & 25 & 882 & 768 & 351 & 464 & 1 & 2 \\ +% Joined validation & 63 & 2 & 232 & 226 & 14 & 26 & 8 & 2 \\ +% Non final class & 1056 & 2370 & 7141 & 3201 & 3723 & 7663 & 0 & 0 \\ +% Private static method & 35 & 133 & 529 & 541 & 667 & 648 & 2 & 9 \\ +% Public static method & 37 & 408 & 1166 & 1212 & 1142 & 1088 & 4 & 12 \\ +% Null Assignment & 103 & 35 & 1245 & 803 & 717 & 1150 & 6 & 15 \\ +% Multiple While & 65 & 7 & 120 & 97 & 16 & 39 & 0 & 0 \\ +% Protected Method & 71 & 151 & 868 & 402 & 1147 & 1613 & 7 & 7 \\ +% Send Null & 13 & 109 & 556 & 325 & 1510 & 1733 & 5 & 13 \\ +% Nested Loop & 412 & 35 & 580 & 527 & 29 & 81 & 1 & 2 \\ \ No newline at end of file diff --git a/wp/sections/related_work.tex b/wp/sections/related_work.tex index d0f22f6d..f8ce4101 100644 --- a/wp/sections/related_work.tex +++ b/wp/sections/related_work.tex @@ -1,89 +1,85 @@ -There are many definitions of a defect. -\citet{5989519} says that defect is ``a fault, bug, inaccuracy or lack of expected -functionality in a project artifact.'' -\citet{Assurance} says that it is ``a problem -(synonym of fault) which, if not corrected, -could cause an application to either fail or to produce incorrect results.'' +\subsection{Quality and quality metrics} +IEEE Standards define software quality as the array of features of +a software product that represent its capability to satisfy +specific needs \citep{Youness2013ComparativeSO}. +Software quality is the extent to which a process, +component, or system fulfills customers' needs or expectations +through product or service features, thus providing customer +satisfaction \citep{Iacob_Constantinescu}. -Defects and software quality are directly related. There are multiple studies -of defect types, their impact, complexity, -root causse and other characteristics,~\citet[e.g.][]{10.1145/69605.2085, -10.5555/256664.256773, 10.1145/390016.808455, Glass1981PersistentSE, -10.1145/1353535.1346323, 10.1007/s10664-013-9258-8, catolino2019bugs}. -There are a few common preventive ways to deal with defects, like -static code analyzers, testing software, or peer code review. +Functional and structural qualities are the key aspects of software +quality \citep{Liu2006AnIE}. \citet{Farhan} describe +functional quality as the capability of the software +to properly perform its tasks according to user needs and +intended objectives. Structural quality refers to +the resilient structure of the code itself and is difficult to test +compared to functional quality. The main difficulty is that this +notion is quite subjective. -Static code analyzer -is a tool that helps find defects before a program is executed. -Such an analyzer inspects various program representations, for example -Abstract Syntax Tree (AST), Control Flow Graphs (CFG), or -Program Dependency Graph (PDG), -and search for handcrafted defect patterns. -These tools are popular among developers and are often embedded into -Integrated Development Environments (IDE) such as IntelliJ IDEA or NetBeans. -There are more than 40 static analyzers currently on the market, including -very famouns open source projects, such as -PMD, Rubocop, PHPCS, Sparse, CLion, cpplint, FindSecBugs, ESLint, and Checkstyle. -There are also many commercial tools, like -IBM Security AppScan, PVS-Studio, SonarQube, and Parasoft. -Software companies like Google and Facebook have their own open source -static analyzers: Error Prone~\citep{10.1109/SCAM.2012.28} and -Infer~\citep{10.1007/978-3-319-17524-9_1} respectively. -Static code analyzer may vary in supported languages, -supported defect types, and their integration workflow. +In general, structural code quality is a multi-faceted concept, which covers +different attributes of software engineering, for example, maintainability and +readability \citep{Mohammadi2013AnAO}. To evaluate them, various metrics of +software structure were proposed. For instance, McCabe’s software complexity +metrics \citep{McCabe1976ACM} and cognitive complexity metric +\citep{Cognitive}, which are intended to measure readability aspects of the +code. Also, for object-oriented systems, a popular set of metrics is the CK +suite \citep{Chidamber1994AMS}. Many approaches apply such metrics suite to +distinguish parts of the source code with good or bad quality +\citet{Fil2015ACO}, \citet{Shatnawi2010FindingSM} or to identify code smells +(problematic properties and anti-patterns of code) +\citet{Ouni2011MaintainabilityDD}. However, in general, the software engineering +community has not yet reached a consensus as to what exactly structural quality +or maintainability is \citep{Broy2006DemystifyingM}. +% (\todo: this is quite an old reference...). -Usually static code analyzers are rule-based. -In order to detect a new kind of defect their developers have to -design a new pattern. To address this extensibility problem -there were attempts to learn pattern -from data, as explained by~\citet{bielik2016learning, wang2019learning}. -Recent empirical study by~\citet{10.1145/3238147.3238213} demonstrates that -state-of-the-art code analyzers miss more than 90\% of defects. Most of those missed -defects are inconsistencies with the specification or programmer's intent. -In order to catch such defects it is necessary to reason about -possible behavior depending on the input data, -which is difficult or impossible for the rule-based approach. -Such defects are known as \emph{semantic} defects. -It has been demonstrated by~\citet{10.1007/s10664-013-9258-8} -that semantic defects are the dominant root cause for the majoirity of security issues, -when attacker may get unauthorized access to some resources. +\subsection{Software patterns and code smells} -Another issue of most static analyzers is their high -percent of ``false-positives,'' when they find a defect, which -in reality is not a defect. The amount of these false signals grows -when the size and complexity of the project increases. -This leads to developers loosing trust to the tool and stop -using it. +We understand the term \textit{software patterns} in the most general and +abstract way, namely, as any observed code subtructures and software solutions. +Patterns can be of different scale (from variable and method-level to project +level). The term \textit{designed patterns} refers to patterns that are +recommended solutions to commonly occurring programming goals and problems +\citep{gamma1995design}. Despite their popularity, there is much controversy about the +usefulness and universality of such recommended ways of implementation +\citep{mcconnell2004code}. The software engineering community +also identifies patterns that are detrimental to the resulting code. Such +patterns are often called \textit{code smells}. These are parts of the +source code that contain violations of fundamental design principles +and negatively impacts maintainability in terms of the +ability of the product to evolve, quality of end-product, and developer +productivity \citet{Reeshti2019MeasuringCS}. \citet{Din2012AntipatternsDA} +identified 22 types of code smells in object-oriented +design. \citet{Kessentini2019UnderstandingTC} found strong correlation between +several code smells and software bugs. -The efficiency of static analyzers is yet another problem. It was -empirically shown by~\citet{10.1145/3188720} that -a better performance metric for a static analyzer -is the amount of fixed, rather than found defects. -Static analyzers must provide the right information -at the right time doing everything possible to not annoy -software developers. +For the latter reason, a lot of tools and methods have been designed to avoid +code smells. \citet{Kreimer2005AdaptiveDO} proposes a decision tree based +approach to identify code smells, e.g, long method and large class. +\citet{Vaucher2009TrackingDS} apply Bayesian networks to detect God class. +\citet{Palomba2015MiningVH} propose to consider changes of repository history +as an input to the code smell detector for computing the list of code +components affected by the smell. \citet{Liu2019DeepLB} propose a deep learning +based approach to detect code smells. -Usually, static code analyzers use fixed in time code versions as their -input. However, one can also use the history of code changes and -the information from the Issue Tracking System (ITS) -to enhance the quality of prediction, as done by~\citet{Gupta2018IntelligentCR, kapur2018estimating}. +Code smell detection has been integrated into code inspection tools. +\citet{MurphyHill2010AnIA} integrate software metrics visualization with a +source code view. SonarQube\footnote{https://www.sonarqube.org/} controls and +manages the code quality in several ways, such as continuous inspection and +issue detection. The platform shows issues like code smells, bugs, using +lightweight visualizations. +%It also helps developers to collaborate with a +%shared vision of code quality. +Checkstyle\footnote{https://checkstyle.sourceforge.io/} and +PMD\footnote{https://pmd.github.io/} work similarly to SonarQube. -There are studies trying to use ML in order to detect defects. -For example, \citet{Dam2018ADT} first trained vector representations of -an AST in an unsupervised manner and then used it as a feature vector -to train the binary classifier. -\citet{kapur2018estimating} -combined the information extracted from the code programming style and ITS -and built a predictor to estimate the defectiveness of an input source code -file. Using an idea that the names of identifiers (variables, classes, functions) -convey useful information, which might be used to understand programmer's -intent, -\citet{Pradel2018DeepBugsAL} proposed a method that first learned -vector representation of identifiers -and obtained a fixed length vector for a code snippet to train a binary classifier -with feed-forward neural network. -\citet{vasic2019neural} used pointer-network to do joint prediction of -both the location and the possible fix for variable misuse bugs. -\citet{briem2019using} used attention-based neural network to model binary -classifier to detect off-by-one defects. +%iPlasma +%evaluate the quality of %object-oriented programs. It can be used %to calculate +%different metrics and to identify the most %critical points that can be improved +%through code smell %detection. + +All in all, there is no uniform agreement about which patterns are good and +which are bad. We made it our ideology while developing Aibolit: we do not +decide what is good or useful \textit{a priori} but let it be inferred from +data. By customizing the dataset, quality metric and pattern set, the end user +of Aibolit is able infer which patterns are good for his own end goals. diff --git a/wp/sections/risks.tex b/wp/sections/risks.tex deleted file mode 100644 index fffe210e..00000000 --- a/wp/sections/risks.tex +++ /dev/null @@ -1,26 +0,0 @@ -There is a number of risks we identified and expect to mitigate. - -\textbf{Survey Validity}. -While designing the survey procedure we have to -consider the following threats to its validity: -\begin{enumerate*}[label=\arabic*)] - \item Set of interviewers must be representive. - Will the results be different if we take a different set of interviewers? - \item Set of code snippets must be representive. - Will the results be different if we take a different set of code snippets? - \item How to identify bad interviewers? - Some interviewers may give answers that are not correlated with actual readability. -\end{enumerate*} -To mitigate this risk we have to think about the diversity of interviewers, -for example by age, skills, experience, education, and so on. We have to select code snippets -that vary in length and project domain. The more data we collect the better. -We have to preprocess survey results to exclude outliers. - -\textbf{Feature Limits}. -To predict the readability we are going to use features -calculated with static code analysis tools. -These features mainly reflect the structure, the syntax and -size properties of a particular code snippet. -However, we are not considering semantic properties of the code. -In the future, we can design semantic related features and -add them to our dataset. \ No newline at end of file diff --git a/wp/sections/saved.tex b/wp/sections/saved.tex deleted file mode 100644 index af50cd7f..00000000 --- a/wp/sections/saved.tex +++ /dev/null @@ -1,63 +0,0 @@ -% The refactoring recommendation -% is a pointer to specific line code with description what to do to improve code readability. -% To do this we studied a relationships between the code readability and mannualy designed AST patterns usually -% encountered in the code. Knowing that the pattern presented in code and has negative impact on readability -% we do our recommendations, pointing to code line where this pattern presented in the code. - -% In our work we created a dataset of Java code snipets features. The features may be devided into -% three groups: code metrics, AST patterns and code readability. The code metrics calculated using existing -% tools: CheckStyle, RefactoringMiner, ChangeDistiller, and SourceMeter. The AST patterns are manually designed -% features that reflect a presense of some syntax structures or other static code preperties, for example -% number of nested FOR loops of length 2 or number ternary operators. To obtain the code readability we conducted -% a survey that described in the next section. - -% \subsection{Dataset} - -% Next, we enriched each pair with metrics calculated -% based on static code, for example \acrfull{sloc}, \acrfull{cyclo}. Eventually we -% have the dataset, where for each snippet of code there are readability score and the set of metrics. - - -% % \begin{itemize} -% % \item Is it possible to predict code readability using static metrics of code? -% % \item Which metrics do have more importance to predict code readability? -% % \item Which ML model predict code readability better? -% % \item Is it possible to recommend particular code refactoring to improve code readability? -% % \end{itemize} - -% \subsection{Methodology} - -% We split our work into two parts. The first one is readability score prediction. -% The second part is code refactoring recommendations to improve code readability. - -% \subsubsection{Readability prediction} - -% Having unseen code snippet we have to estimate its readability score, because we are -% not going to give recomendation if the readability score is acceptable. -% We train a \acrshort{ml} model using gathered dataset. -% We stated the problem as a regression problem of predicting the readability score: - -% $$ -% r_{i} = f(X_{i}, \theta) + e_{i} -% $$ - -% where $i$ is the index of rows in our dataset, $r$ is the readability score, $X$ is -% the set of the code snippet's metrics, $e$ is some error or noise. And our goal is to choose -% the parametric function $f$ and finds its parameters $\theta$. - -% We tested a set of well-known regression models, like Linear Regression, \acrfull{cart}, -% to predict readability. We selected the best model among -% the considered set. - -% \subsubsection{Refactoring recommendations} - -% We measured a features importance with respect to readability score and made an ordered -% list of all features by importance. -% Having unseen code snippet with unacceptable readability score we do our refactoring recomendations. -% We rank our recommendations based on feature importance list. For AST related features we are able -% to point to specific place in the code. - - -% We formulate our business goal following way: having code snippet we want -% get list of refactoring recommendations indicating line and what to do to improve code quality. -% List should be ordered by refactoring impact on quality. \ No newline at end of file diff --git a/wp/sections/selected_task.tex b/wp/sections/selected_task.tex deleted file mode 100644 index 141e82fd..00000000 --- a/wp/sections/selected_task.tex +++ /dev/null @@ -1,84 +0,0 @@ -%Here will be paragraph linking previous sections with "Code Readability". - -%The research question is the following: is it possible, using static code analysis to do -%a code refactoring recommendations to improve a code readability? - -We are looking for an answer to the following research question: -Is it possible, -combining static code analysis and ML, -to detect defects in a Java class and -to give specific recommendations for its refactoring? - -To answer this question we are doing the following research: -\begin{enumerate*}[label={\alph*)}] - \item We take a set of Java \emph{classes}; - \item We collect a number of static analysis \emph{metrics} per each class; - \item We locate \emph{code patterns} inside each class; - \item We ask volunteer programmers to review - each class to estimate their \emph{readability}; - \item We put collected data together into a \emph{dataset}; - \item We find \emph{relations} between metrics and patterns in the dataset; - \item We take an unseen Java class and \emph{locate code patterns}, which impact the quality more then others. -\end{enumerate*} - -\textbf{Classes}. -We parse Java projects from GitHub to obtain training Java classes. -All parsed projects must have more than 100 stars, and more than five collaborators. -Collected Java classes must have more than 50 lines of code and less than 300. - -\textbf{Metrics}. -The code metrics are calculated using existing open-source -tools like CheckStyle and SourceMeter. Examples of metrics are Lines of Code (LoC), -Cyclomatic Complexity (CC), and Number of Incoming Invocations (NII). - -\textbf{Code Patterns}. -AST patterns are the features that reflect the presence -of some syntax structures in the code, -for example number of nested \texttt{FOR} loops -or the amount of ternary operators. The AST patterns are manually designed -and always point to a specific place in the code. - -\textbf{Readability}. -To gather readability characteristics we conduct -a survey, where volunteering programmers are asked to estimate -the readability of training Java classes, giving answers on a $[0..9]$ scale, -where 0 means not readable at all and 9 means perfectly readable. -Each programmer is asked to review some snippets from -the entire training dataset, which means that each -snippet is reviewed by a few programmers. The readability ``score'' per -snippet is a mean of all answers collected. - -\textbf{Dataset}. -We combine and put the data into the single dataset. -In the \autoref{tab:table1} you can see the example how dataset looks -(here CC, CBO, LCOM, and NMD are acronyms for software metrics, -while RS is the Readability Score collected from volunteers). - -\begin{table}[H] -\begin{center} -\begin{tabular}{rrrrrr} -\hline -CC & CBO & LCOM & NMD & \dots & RS \\ -\hline -3 & 6 & 34 & 2 & \dots & 4.3 \\ -4 & 5 & 55 & 1 & \dots & 2.4 \\ -3 & 5 & 22 & 0 & \dots & 5.2 \\ -\hline -\end{tabular} -\end{center} -\caption{Example of collected dataset} -\label{tab:table1} -\end{table} - -\textbf{Relations}. -Using ML methods we are going to learn how code metrics, -AST patterns and readability are related. -We are planning to use ML techniques to investigate the importance of features -with respect to the readability and find combinations that have stronger impact. - -\textbf{Locate Code Patterns}. -Finally, for an unseen Java class we can calculate all features except the readability. -Using known features and the knowledge about relations between features we are planning -to give refactoring recommendations and provide links to the lines of code -where most problematic patterns are located. - diff --git a/wp/sections/selection_criteria.tex b/wp/sections/selection_criteria.tex deleted file mode 100644 index abfea3af..00000000 --- a/wp/sections/selection_criteria.tex +++ /dev/null @@ -1,83 +0,0 @@ -There is a number of crucial expectations we have for the -method and the tool under design: - -\textbf{Novelty} is crucial and has a few important aspects: -\begin{enumerate*} -\item -Novelty of the \emph{method} is one the most important values of novelty. -E.g.,~\citet{Akiyama1971AnEO} was the first who -tried to solve defect detection problem using -linear regression as it was mentioned by~\citet{7476771}. -Sometimes authors can use some new idea in the -algorithm, e.g,~\citet{XIAO201917} used convolutional neural network -together with word-embedding and feature-detecting techniques. - -\item -The \emph{effectiveness} of the model is another important -value of novelty. If authors published new model which -is more effective, it means that we can solve a problem -much better. All evaluation measures can be described later. - -\item -Collecting new dataset is one of the most important and hard work in Data Science. -Main results can vary depending on method used when gathering a dataset. -It is necessary to do it in a correct way, since there are many issues related -to data problems like \emph{Outliers}, \emph{Class Imbalance}, \emph{Data shift Problem}, -\emph{High Dimensionality of Data}, etc. mentioned by~\citet{10.1007/s10462-017-9563-5}. -Also, \emph{granularity} is another problem in the -defect detection problem. Defect can be found in a module, -function, line of the code, etc. That is why seems -it is not an easy task to compare the models with a -different level of granularity. -E.g,~\citet{6464273} analyzed five papers published by -IEEE and mentioned that there are some data -quality issues in these papers. -\end{enumerate*} - -\textbf{Evaluation} is the essential of ML. It is impossible -to say whether a model is good or bad, not evaluating it. -A lot of model performance evaluation measures were used -for defects detection, but we can -classify them the following way \citep{10.1007/s10462-017-9563-5, Jiang2008}: -\begin{enumerate*}[label=\arabic*)] -\item \emph{Numeric} performance evaluation measures -are mostly used for defect detection models~\citep{Jiang2008}. -They can include accuracy, F1-score, G-means, specificity, -f-measure, and so on. E.g., they are used by~\citet{6349519}. -\item \emph{Graphical} measures are graphs derived from -the confusion matrix~\citep{Jiang2008}. -They can include ROC curve, precision-recall curve, cost curve etc. -E.g., they are used by~\citet{SHATNAWI20081868}. -\end{enumerate*} - -\textbf{Performance} is another important value for defect detection model. -The selection of the best performance evaluation measure -is not a trivial task. -For example,~\citet{Jiang2008} compared different alternatives -and demonstrated that no single performance evaluation measure is able to evaluate the -performance of a defect prediction model fair enough. The authors also added -that it is better not only to measure model classification performance -(like accuracy, recall, etc.), but to minimize the misclassification cost. -\citet{ARISHOLM20102} also compared different types of measures -and mentioned that it is hard to draw general conclusions about the -best performance evaluation measure. - -Since the tool will be actively used with Continuous Integration (CI), -it must have a reasonable performance: it must predict in a few seconds. -If calculation takes a lot of time, a developer may stop using it. -\citet{humble2010continuous} also mentioned that speed -is very important for CI. The sooner you release the software, -the sooner you get a return on your investment. -Performance is also important because it is necessary -to know whether bugfixes are useful. That is why -we need to minimize the delay between the releases and -thus, accelerate the feedback, as explained by~\citet{humble2010continuous}. - -State-of-the-art papers use Neural Networks (NN) for defect -detection problem since it can give better results~\citep{XIAO201917,10.1145/3360588}. -Training takes a lot of time and demands TPU or GPU resources if we use NN. -Word embedding and other feature-detecting techniques can only increase training time. -E.g., \citet{10.1145/3360588, 8616596} noticed that -the training time matters for defect detection problem and -demonstrates the training time of their models. - diff --git a/wp/sections/threats_to_validity.tex b/wp/sections/threats_to_validity.tex new file mode 100644 index 00000000..ac2bd907 --- /dev/null +++ b/wp/sections/threats_to_validity.tex @@ -0,0 +1,4 @@ +% consider only frequencies not inter location +% not consider context +% top OSS datasets +% quality metrics \ No newline at end of file diff --git a/wp/sections/usage_scenarios.tex b/wp/sections/usage_scenarios.tex new file mode 100644 index 00000000..f3317d06 --- /dev/null +++ b/wp/sections/usage_scenarios.tex @@ -0,0 +1,68 @@ +\subsection{Aibolit Index} + +In addition to the recommendation functionality, we designed a feature to measure the overall quality of developer's source code, \textit{Aibolit Index}. +It is is a single number, with following properties: +\begin{itemize} +\item[(i)] the more patterns are suggested to fix, the higher Aibolit Index; +\item[(ii)] the higher negative impact factor (see Section \ref{sec:recommendation_algorithm}) +of detected patterns the higher Aibolit Index. +\end{itemize} +Therefore, the lower Abolit Index the better the project's code from the point +of view of Aibolit. + +Let $P(C)$ be a set of all patterns Aibolit recommends to fix for Java class $C$. +% As we know from the section ~\ref{sec:recommendation_algorithm}, each +% recommended anti-pattern $p \in P(C)$ has an associated impact factor $I_{p}$ and a count $count(p, C)$. +We define the Aibolit Index $A(C)$ of Java class $C$ as a sum of the products of +impact factors $I_{p}$ and scaled counts $count(p, C)$ for all of the +patterns occurring in the class (Eq.~\ref{eq:aibolit_index}). We use log-scaling for smoothing purposes, because some patterns are a lot more common than others. + + +\begin{equation} + A(C) = \sum_{p \in P(C)} { I_{p}(C) \cdot \ln{(count(p, C) + 1)} } \label{eq:aibolit_index} +\end{equation} + + +The Aibolit Index of a project is defined as average Aibolit Index +across all Java classes in the project. In Table~\ref{table:aibolit_index_repos} +you can see a calculated Aibolit Index of some GitHub Java repositories with +more than 4500 stars. Aibolit Index is supposed to be a convenient instrument +to get a first estimate of the project code's quality. + +\begin{table}[t] +\footnotesize + \begin{tabular}{|l|l|l|l|l|l|l|} + \hline + Repository & Aibolit Index & Total files & Total NCSS & GitHub stars \\ + \hline + ReactiveX\textbackslash RxJava& 6.66& 1493& 25270 & 42972 \\ + bumptech\textbackslash glide& 5.81& 465& 5078 & 29364 \\ + JakeWharton\textbackslash butterknife& 6.31& 74& 935 & 25347 \\ + greenrobot\textbackslash EventBus& 7.48& 51& 651 & 22637 \\ + skylot\textbackslash jadx& 6.69& 602& 12658 & 22602 \\ + alibaba\textbackslash fastjson & 9.97& 144& 20175 & 21891 \\ + alibaba\textbackslash druid & 6.36& 822& 28852 & 21581 \\ + Netflix\textbackslash Hystrix& 6.74& 292& 2920 & 19888 \\ + ReactiveX\textbackslash RxAndroid& 5.47& 9& 59 & 19010 \\ + google\textbackslash gson& 6.87& 160& 2941 & 18084 \\ + square\textbackslash picasso& 6.21& 26& 687 & 17514 \\ + libgdx\textbackslash libgdx & 5.02 & 1981 & 46409 & 17105 \\ + nostra13\textbackslash Android-Universal-Image-Loader& 7.84& 62& 1059 & 16722 \\ + qiurunze123\textbackslash miaosha& 3.57& 197& 841 & 16224 \\ + wuyouzhuguli\textbackslash SpringAll& 13.41& 467& 463 & 15221 \\ + justauth\textbackslash JustAuth& 4.36& 46& 241 & 8916 \\ + heibaiying\textbackslash BigData-Notes& 11.17& 54& 204 & 7166 \\ + crossoverJie\textbackslash cim & 6.70& 96& 574 & 5841 \\ + wildfirechat\textbackslash server& 6.51& 285& 4671 & 5140 \\ + febsteam\textbackslash FEBS-Shiro & 5.66& 90& 453 & 4777 \\ + \hline + \end{tabular} + \centering +\caption{Aibolit Index of some popular Java repositories. \label{table:aibolit_index_repos}} +\end{table} + + + + + + diff --git a/wp/wp.tex b/wp/wp.tex index aa6fcea9..35a40e80 100644 --- a/wp/wp.tex +++ b/wp/wp.tex @@ -1,9 +1,9 @@ \documentclass[12pt]{article} -\usepackage{natbib} +\usepackage[natbib=true,style=authoryear]{biblatex} + \addbibresource{references.bib} + \setlength\bibitemsep{0pt} \usepackage{pgf} - \bibliographystyle{plainnat} - \setcitestyle{citesep={,},aysep={}} -\usepackage[colorlinks,citecolor=blue,linkcolor=black,bookmarks=false,hypertexnames=true]{hyperref} +\usepackage[colorlinks,citecolor=blue,linkcolor=black,bookmarks=false,hypertexnames=true]{hyperref} \usepackage{url} \usepackage[inline]{enumitem} \usepackage{float} @@ -12,12 +12,54 @@ \usepackage{graphicx} \usepackage{xcolor} \usepackage{setspace} - + \setstretch{1.1} \usepackage{enumitem} +\usepackage{caption} +\usepackage{xcolor} +\usepackage[top=1.3in, left=1.4in, includefoot]{geometry} \setlist{nosep} - - \setstretch{1.1} -\setlength{\bibsep}{0.0pt} +\usepackage{xcolor} +\usepackage{algorithm} +\usepackage{algpseudocode} +\usepackage{multicol} +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage{charter} +\usepackage{listings} +\usepackage{authblk} + +\definecolor{codegreen}{rgb}{0,0.6,0} +\definecolor{codegray}{rgb}{0.5,0.5,0.5} +\definecolor{codepurple}{rgb}{0.58,0,0.82} +\definecolor{backcolour}{rgb}{0.95,0.95,0.92} + +\lstdefinestyle{mystyle}{ + backgroundcolor=\color{backcolour}, + commentstyle=\color{codegreen}, + keywordstyle=\color{magenta}, + numberstyle=\tiny\color{codegray}, + stringstyle=\color{codepurple}, + basicstyle=\sffamily\footnotesize, + breakatwhitespace=false, + breaklines=true, + captionpos=b, + keepspaces=true, + numbers=left, + numbersep=5pt, + showspaces=false, + showstringspaces=false, + showtabs=false, + tabsize=2 +} + +\lstset{style=mystyle} + +\newcommand{\pattern}[3]{ + {\bf #2. #1} \\ + {\it Description:} #3 \\ +} + +\newcommand{\todo}{\textcolor{red}{\textbf{TODO}}} \tolerance=800 \usepackage{pgfplots} @@ -30,88 +72,63 @@ \renewcommand*{\bibfont}{\footnotesize} \title{ - \includegraphics[height=48pt]{logo}\\ - \textsc{Aibolit}:\\ - Style Checking\\ + \includegraphics[height=100pt]{logo.png}\\ + \vspace{10pt} + \textsc{Aibolit:}\\ + Static Analysis Using Machine Learning} - +\author{Yegor Bugayenko, Anton Cheshkov, Ekaterina Garmash, Andrey Gusev, Yaroslav Kishchenko, Pavel Lukyanov, Evgeny Maslov, Vitaly Protasov} +\affil{Huawei Technologies Co., Ltd. \\ System Programming Lab \\ Russian Research Institute (RRI) \\ Moscow, Russia} \begin{document} \maketitle +\pagebreak + \begin{abstract} -Quality of code is crucial for the stability and robustness -of software systems. Defect detection at early stages of -software development lifecycle is the cheapest and the easiest -way to increase the quality of code. There are well-known -instruments such as static analyzers and linters, which are -actively used by programmers. However, none of them use Machine -Learning (ML) to detect defects more effectively. -We managed to create such an instrument and empirically demonstrate -its effectiveness\footnote{This is statement is yet to be confirmed}. + +Aibolit is a next generation static analyzer powered by machine learning. +Aibolit gives recommendations to developers to avoid specific software patterns +in order to improve quality of the source code. Aibolit can be extended by adding +custom patterns and quality metrics of choice. In this paper, we explain +how Aibolit works and how it differs from other static analyzers. + \end{abstract} \pagebreak \section{Introduction} +\label{sec:intro} \input{sections/introduction} -\section{Related Work} + +\section{Software quality and patterns} \label{sec:related} \input{sections/related_work} -\section{Categories of Methods} -\label{sec:categories} -\input{sections/considered} - -\section{Available data} -\label{sec:data} -\input{sections/available_data} - -\section{Selection Criteria} -\label{sec:criteria} -\input{sections/selection_criteria} - -\section{The Method} -\label{sec:method} -\input{sections/selected_task} - -\section{Risks} -\label{sec:risks} -\input{sections/risks} - -% \section{Implementation Details} -% \label{sec:implementation} -% \input{sections/implementation} - -\section{Empirical Results} -\label{sec:results} -\input{sections/empirical_results} - -\section{Conclusion} -We will write this section later. - -\section{Future Work} -We will write this later. - -\section{Acknowledgements} -The tool was designed in System Programming Lab. Many thanks -to the contribution of (in alphabetic order of the last name): -Yegor Bugayenko, -Anton Cheshkov, -Vadim Chibiriev, -Lu Jianhua, -Evgeny Maslov, -Yu Jiayuan, -Alexey Zorchenkov. - -%\section{Notes} -%\input{sections/notes} - -\bibliography{references} - - - +\section{How the Aibolit recommender works} +\label{sec:how_aibolit_works} +\input{sections/how_aibolit_works} + +\section{Other usage scenarios} +\label{sec:usage_scenarios} +\input{sections/pattern_emp_analysis} +\input{sections/usage_scenarios} + +\section{Conclusion \& Future Work} +\label{sec:conclusion} +\input{sections/conclusion} + +\newpage +\AtNextBibliography{\small} +\setstretch{1.0} +\raggedright +\begin{multicols}{2}\printbibliography\end{multicols} + +\newpage +\section*{Appendix} +\label{sec:appendix} +\input{sections/appendix} \end{document}