In [1]:
# default_exp exp.metrics_java

In [None]:
# export 

import pandas as pd
import os
import shutil
from subprocess import *

In [None]:
# export

import logging

logger = logging.getLogger()
fhandler = logging.FileHandler(filename='mylog.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.INFO)

## metrics_java

> This module provides a tool for computing metrics (from static analysis) for python source code using Using <a href="https://github.com/mauricioaniche/ck">CK Package</a>

> @Alvaro 26 Jan 2021

Using <a href="https://github.com/mauricioaniche/ck">CK Package</a>

CK is a java package (jar) which is going to be executed from terminal. It requires the code which is going to be analyzed to be located at <i>physical</i> files. For that reason, the dataset is going to be used to produce some <i>.java</i> files.

## Available metrics

#### Note: Further info. can be found at the github repository of the project.

Structural & complexity
- wmc: Weight Method Class or McCabe's complexity
- loc: Lines of code

Complexity-related

- returnQty: Number of return instructions
- loopQty: Number of loops (i.e., for, while, do while, enhanced for).
- comparisonsQty:  Number of comparisons (i.e., == and !=)
- tryCatchQty: Number of try/catch blocks
- parenthesizedExpsQty: The number of expressions inside parenthesis
- nosi: number of invocations to static methods. It can only count the ones that can be resolved by the JDT.
- assignmentsQty
- mathOperationsQty:  The number of math operations (times, divide, remainder, plus, minus, left shit, right shift).
- variablesQty: Number of declared variables
- maxNestedBlocksQty: The highest number of blocks nested together.

Literals

- stringLiteralsQty: Number of string literals
- numbersQty: Number of numeric literals

Number of methods: Count the number of fields, both total (totalMethodsQty) and specific (i.e., static, public, abstract, private, protected, default, final, and synchronized)

- totalMethodsQty:
- staticMethodsQty
- publicMethodsQty
- privateMethodsQty
- protectedMethodsQty
- defaultMethodsQty
- abstractMethodsQty
- finalMethodsQty
- synchronizedMethodsQty

Number of fields: Count the number of fields, both total (totalFieldsQty) and specific (i.e., static, public, private, protected, default, final, and synchronized)

- totalFieldsQty
- staticFieldsQty
- publicFieldsQty
- privateFieldsQty
- protectedFieldsQty
- defaultFieldsQty
- visibleFieldsQty
- finalFieldsQty
- synchronizedFieldsQty

Classes

- anonymousClassesQty: Number of anonymous classes
- innerClassesQty: Number of inner classes
- lambdasQty: Number of lambda expressions

Indepentent
- uniqueWordsQty: Number of unique words: Number of unique words in the source code. At method level, it only uses the method body as input. At class level, it uses the entire body of the class as metrics. The algorithm basically counts the number of words in a method/class, after removing Java keywords.
- modifiers: public/abstract/private/protected/native modifiers of classes/methods

Each record, corresponds to a individual class. When working with method-level snippets, "articial" classes are created for performing the analysis.

In [None]:
#Utils method

# export

def write_dataset_to_files(df_series, destination_path):
    """
    Function to generate .java files.
    
    Params:
    # df_series: Pandas Series (DataFrame column) with the source code records.
    # destination_path: (str) Absolute path to be used as directory for the generated files.
    
    Returns:
    
    Collection of paths for the corresponding java files.
    
    """
    java_template = 'public class <class_name>{\n    <code_snippet>\n}'
    
    if not os.path.exists(destination_path):
        logging.info('Creating directory.')
        os.mkdir(destination_path)
    
    logging.info("Generating physical .java files.")
    
    file_paths = []
    for idx, value in df_series.iteritems():
        class_name = f'ClassRecord{idx}'
        code = java_template.replace('<class_name>', class_name)
        code = code.replace('<code_snippet>', value)
        file_path = f'{destination_path}/{class_name}.java'
        with open(file_path, 'w') as file:
            file.write(code)
            file_paths.append(file_path)
            
    return file_paths

Execute <i>jar</i> file from python and get the output

In [None]:
# export

def jarWrapper(*args):
    process = Popen(['java', '-jar']+list(args), stdout=PIPE, stderr=PIPE)
    ret = []
    while process.poll() is None:
        line = process.stdout.readline()
        if line != '' and line.endswith(b'\n'):
            ret.append(line[:-1])
    stdout, stderr = process.communicate()
    
    ret += stdout.split(b'\n')
    if stderr != '':
        ret += stderr.split(b'\n')
        
    if '' in ret:
        ret.remove('')
    return ret

In [None]:
# Execution example

args = ['ck_metrics_tool/ck-metrics.jar', '/tf/main/nbs/test_data/test_metrics', 'false', '0', 'True'] # Any number of args to be passed to the jar file

result = jarWrapper(*args)
print(f'Result: {result}')

Result: [b'', b'log4j:WARN No appenders could be found for logger (com.github.mauricioaniche.ck.CK).', b'log4j:WARN Please initialize the log4j system properly.', b'']


In [None]:
!pwd

/tf/main/nbs


In [None]:
# export

class JavaAnalyzer():
    """
    Class get metrics f
    """
    def __init__(self, ck_jar_path):
        self.ck_jar_path = ck_jar_path
    
    def compute_metrics(self, df_series, files_destination_path):
        """
        Computes metrics for a pandas series of java source code snippets
        
        Params
        # df_series: Pandas series (df column) containing java source snippets
        # files_destination_path: Path indicating where the physical .java files are going to be created (for metrics computation)
        
        Returns:
        
        Pandas Dataframe containing metrics
        
        """
        file_paths = write_dataset_to_files(df_series, files_destination_path)
        self.__call_ck_package(files_destination_path)
        metrics_df = self.__get_metrics_df()
        self.__remove_csv_files()
        self.__remove_tmp_java_files(file_paths)
        
        return metrics_df
        
    def __call_ck_package(self, files_path):
        """
        Performs call to external .jar package.
        """
        args = [self.ck_jar_path, files_path, 'false', '0', 'True']
        result = jarWrapper(*args)
        logging.info(f'CK package produced this output:\n{result}')
        
    def __get_metrics_df(self):
        """
        Reads report files (csv) generated by the CK package.
        
        Returns:
        
        Pandas Dataframe containing appropriate metrics
        """
        class_metrics_df = pd.read_csv('class.csv')
        # method_metrics_df = pd.read_csv('method.csv')

        # merged_df = pd.merge(left = class_metrics_df, right = method_metrics_df, left_on='file', right_on='file')

        appropriate_columns = ['file','class', 'wmc', 'totalMethodsQty', 'staticMethodsQty', 'publicMethodsQty', 'privateMethodsQty',
                          'protectedMethodsQty', 'defaultMethodsQty', 'abstractMethodsQty', 'finalMethodsQty','synchronizedMethodsQty',
                          'totalFieldsQty', 'staticFieldsQty', 'publicFieldsQty', 'privateFieldsQty', 'protectedFieldsQty',
                          'defaultFieldsQty', 'visibleFieldsQty', 'finalFieldsQty', 'synchronizedFieldsQty',
                          'nosi', 'loc', 'returnQty', 'loopQty', 'comparisonsQty', 'tryCatchQty', 'parenthesizedExpsQty',
                          'stringLiteralsQty', 'numbersQty', 'assignmentsQty', 'mathOperationsQty', 'variablesQty', 'maxNestedBlocksQty',
                          'anonymousClassesQty', 'innerClassesQty', 'lambdasQty', 'uniqueWordsQty', 'modifiers']

        class_metrics_df = class_metrics_df[appropriate_columns]

        return class_metrics_df
    
    def __remove_csv_files(self):
        """
        Removes files generated by CK package.
        """
        if os.path.exists('class.csv'):
            os.remove('class.csv')
        if os.path.exists('method.csv'):
            os.remove('method.csv')
        if os.path.exists('field.csv'):
            os.remove('field.csv')
            
    def __remove_tmp_java_files(self, paths):
        """
        Removes the temporary generated java files.
        """
        for file_path in paths:
            os.remove(file_path)

In [None]:
# General parameters for testing

def get_default_params():
    return {
    'ck_jar_path': 'ck_metrics_tool/ck-metrics.jar',
    'search_net_ds_path': '/tf/main/dvc-ds4se/code/searchnet/clean_java.csv',
    'sampling_size': 100,
    'physical_files_path': '/tf/main/nbs/test_data/test_metrics'
}

## Testing JavaAnalyzer
Explore with some data

In [None]:
params = get_default_params()

In [None]:
java_analyzer = JavaAnalyzer(params['ck_jar_path'])

In [None]:
java_df = pd.read_csv(params['search_net_ds_path'])

In [None]:
samples = java_df.sample(params['sampling_size'])

In [None]:
paths = write_dataset_to_files(samples['code'], params['physical_files_path'])

Generating physical .java files.


In [None]:
java_metrics = java_analyzer.compute_metrics(samples['code'], params['physical_files_path'])

Generating physical .java files.
CK package produced this output:
[b'', b'log4j:WARN No appenders could be found for logger (com.github.mauricioaniche.ck.CK).', b'log4j:WARN Please initialize the log4j system properly.', b'']


In [None]:
#show metrics dataframe

java_metrics.head()

Unnamed: 0,file,class,wmc,totalMethodsQty,staticMethodsQty,publicMethodsQty,privateMethodsQty,protectedMethodsQty,defaultMethodsQty,abstractMethodsQty,...,numbersQty,assignmentsQty,mathOperationsQty,variablesQty,maxNestedBlocksQty,anonymousClassesQty,innerClassesQty,lambdasQty,uniqueWordsQty,modifiers
0,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord197972,2,1,0,1,0,0,0,1,...,0,2,0,1,1,0,0,0,11,1
1,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord361508,4,1,0,1,0,0,0,1,...,0,5,0,4,2,0,0,0,31,1
2,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord17194,3,1,0,1,0,0,0,1,...,0,1,0,1,1,0,0,0,11,1
3,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord184332,3,1,0,1,0,0,0,1,...,0,1,0,1,1,0,0,0,11,1
4,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord290490,10,1,0,0,0,0,1,1,...,8,3,1,2,2,0,0,0,32,1


In [None]:
print(f'Metrics dataframe columns:\n {java_metrics.columns}')

Metrics dataframe columns:
 Index(['file', 'class', 'wmc', 'totalMethodsQty', 'staticMethodsQty',
       'publicMethodsQty', 'privateMethodsQty', 'protectedMethodsQty',
       'defaultMethodsQty', 'abstractMethodsQty', 'finalMethodsQty',
       'synchronizedMethodsQty', 'totalFieldsQty', 'staticFieldsQty',
       'publicFieldsQty', 'privateFieldsQty', 'protectedFieldsQty',
       'defaultFieldsQty', 'visibleFieldsQty', 'finalFieldsQty',
       'synchronizedFieldsQty', 'nosi', 'loc', 'returnQty', 'loopQty',
       'comparisonsQty', 'tryCatchQty', 'parenthesizedExpsQty',
       'stringLiteralsQty', 'numbersQty', 'assignmentsQty',
       'mathOperationsQty', 'variablesQty', 'maxNestedBlocksQty',
       'anonymousClassesQty', 'innerClassesQty', 'lambdasQty',
       'uniqueWordsQty', 'modifiers'],
      dtype='object')


It is important to remark that each snippet in the dataset, is "transformed" into a class (including a <i>physical</i> .java file) to get the metrics

In [None]:
from nbdev.export import notebook2script
notebook2script()