In [None]:
# default_exp metrics_java

In [None]:
# export 

import pandas as pd
import os
import shutil
from subprocess import *

## metrics_java

> This module provides a tool for computing metrics (from static analysis) for python source code using Using <a href="https://github.com/mauricioaniche/ck">CK Package</a>

Using <a href="https://github.com/mauricioaniche/ck">CK Package</a>

CK is a java package (jar) which is going to be executed from terminal. It requires the code which is going to be analyzed to be located at <i>physical</i> files. For that reason, the dataset is going to be used to produce some <i>.java</i> files.

Each record, corresponds to a individual class.

In [None]:
java_template = 'public class <class_name>{\n    <code_snippet>\n}'
print(java_template)

public class <class_name>{
    <code_snippet>
}


In [None]:
#Utils method

# export

def write_dataset_to_files(df_series, destination_path):
    """
    Function to generate .java files.
    
    Params:
    # df_series: Pandas Series (DataFrame column) with the source code records.
    # destination_path: (str) Absolute path to be used as directory for the generated files.
    
    Returns:
    
    Collection of paths for the corresponding java files.
    
    """
    java_template = 'public class <class_name>{\n    <code_snippet>\n}'
    
    if not os.path.exists(destination_path):
        print('Creating directory.')
        os.mkdir(destination_path)
    
    print('Generating physical .java files.')
    
    file_paths = []
    for idx, value in df_series.iteritems():
        class_name = f'ClassRecord{idx}'
        code = java_template.replace('<class_name>', class_name)
        code = code.replace('<code_snippet>', value)
        file_path = f'{destination_path}/{class_name}.java'
        with open(file_path, 'w') as file:
            file.write(code)
            file_paths.append(file_path)
            
    return file_paths

Execute <i>jar</i> file from python and get the output

In [None]:
# export

def jarWrapper(*args):
    process = Popen(['java', '-jar']+list(args), stdout=PIPE, stderr=PIPE)
    ret = []
    while process.poll() is None:
        line = process.stdout.readline()
        if line != '' and line.endswith(b'\n'):
            ret.append(line[:-1])
    stdout, stderr = process.communicate()
    
    ret += stdout.split(b'\n')
    if stderr != '':
        ret += stderr.split(b'\n')
        
    if '' in ret:
        ret.remove('')
    return ret

In [None]:
# Execution example

args = ['ck_metrics_tool/ck-metrics.jar', '/tf/main/nbs/test_data/test_metrics', 'false', '0', 'True'] # Any number of args to be passed to the jar file

result = jarWrapper(*args)
print(f'Result: {result}')

Result: [b'', b'log4j:WARN No appenders could be found for logger (com.github.mauricioaniche.ck.CK).', b'log4j:WARN Please initialize the log4j system properly.', b'']


In [None]:
!pwd

/tf/main/nbs


In [None]:
# export

class JavaAnalyzer():
    """
    Class get metrics f
    """
    def __init__(self, ck_jar_path):
        self.ck_jar_path = ck_jar_path
    
    def compute_metrics(self, df_series, files_destination_path):
        """
        Computes metrics for a pandas series of java source code snippets
        
        Params
        # df_series: Pandas series (df column) containing java source snippets
        # files_destination_path: Path indicating where the physical .java files are going to be created (for metrics computation)
        
        Returns:
        
        Pandas Dataframe containing metrics
        
        """
        file_paths = write_dataset_to_files(df_series, files_destination_path)
        self.__call_ck_package(files_destination_path)
        metrics_df = self.__get_metrics_df()
        self.__remove_csv_files()
        self.__remove_tmp_java_files(file_paths)
        
        return metrics_df
        
    def __call_ck_package(self, files_path):
        """
        Performs call to external .jar package.
        """
        args = [self.ck_jar_path, files_path, 'false', '0', 'True']
        result = jarWrapper(*args)
        print(f'CK package produced this output:\n{result}')
        
    def __get_metrics_df(self):
        """
        Reads report files (csv) generated by the CK package.
        
        Returns:
        
        Pandas Dataframe containing appropriate metrics
        """
        class_metrics_df = pd.read_csv('class.csv')
        # method_metrics_df = pd.read_csv('method.csv')

        # merged_df = pd.merge(left = class_metrics_df, right = method_metrics_df, left_on='file', right_on='file')

        appropriate_columns = ['file','class', 'wmc', 'totalMethodsQty', 'staticMethodsQty', 'publicMethodsQty', 'privateMethodsQty',
                          'protectedMethodsQty', 'defaultMethodsQty', 'abstractMethodsQty', 'finalMethodsQty','synchronizedMethodsQty',
                          'totalFieldsQty', 'staticFieldsQty', 'publicFieldsQty', 'privateFieldsQty', 'protectedFieldsQty',
                          'defaultFieldsQty', 'visibleFieldsQty', 'finalFieldsQty', 'synchronizedFieldsQty',
                          'nosi', 'loc', 'returnQty', 'loopQty', 'comparisonsQty', 'tryCatchQty', 'parenthesizedExpsQty',
                          'stringLiteralsQty', 'numbersQty', 'assignmentsQty', 'mathOperationsQty', 'variablesQty', 'maxNestedBlocksQty',
                          'anonymousClassesQty', 'innerClassesQty', 'lambdasQty', 'uniqueWordsQty', 'modifiers']

        class_metrics_df = class_metrics_df[appropriate_columns]

        return class_metrics_df
    
    def __remove_csv_files(self):
        """
        Removes files generated by CK package.
        """
        if os.path.exists('class.csv'):
            os.remove('class.csv')
        if os.path.exists('method.csv'):
            os.remove('method.csv')
        if os.path.exists('field.csv'):
            os.remove('field.csv')
            
    def __remove_tmp_java_files(self, paths):
        """
        Removes the temporary generated java files.
        """
        for file_path in paths:
            os.remove(file_path)

Explore with some data

In [None]:
java_analyzer = JavaAnalyzer('ck_metrics_tool/ck-metrics.jar')

In [None]:
java_df = pd.read_csv('/tf/main/nbs/test_data/clean_java.csv')

In [None]:
samples = java_df.sample(100)

In [None]:
paths = write_dataset_to_files(samples['code'], '/tf/main/nbs/test_data/test_metrics')

Generating physical .java files.


In [None]:
java_metrics = java_analyzer.compute_metrics(samples['code'], '/tf/main/nbs/test_data/test_metrics')

Generating physical .java files.
CK package produced this output:
[b'', b'log4j:WARN No appenders could be found for logger (com.github.mauricioaniche.ck.CK).', b'log4j:WARN Please initialize the log4j system properly.', b'']


## Description of available metrics

In [None]:
java_metrics.columns

Index(['file', 'class', 'wmc', 'totalMethodsQty', 'staticMethodsQty',
       'publicMethodsQty', 'privateMethodsQty', 'protectedMethodsQty',
       'defaultMethodsQty', 'abstractMethodsQty', 'finalMethodsQty',
       'synchronizedMethodsQty', 'totalFieldsQty', 'staticFieldsQty',
       'publicFieldsQty', 'privateFieldsQty', 'protectedFieldsQty',
       'defaultFieldsQty', 'visibleFieldsQty', 'finalFieldsQty',
       'synchronizedFieldsQty', 'nosi', 'loc', 'returnQty', 'loopQty',
       'comparisonsQty', 'tryCatchQty', 'parenthesizedExpsQty',
       'stringLiteralsQty', 'numbersQty', 'assignmentsQty',
       'mathOperationsQty', 'variablesQty', 'maxNestedBlocksQty',
       'anonymousClassesQty', 'innerClassesQty', 'lambdasQty',
       'uniqueWordsQty', 'modifiers'],
      dtype='object')

It is important to remark that each snippet in the dataset, is "transformed" into a class (including a <i>physical</i> .java file) to get the metrics

<ul>
    <li>wmc: Weight Method Class or McCabe's complexity</li>
    <li>totalMethodsQty: </li>
    <li>staticMethodsQty </li>
    <li>publicMethodsQty </li>
    <li>privateMethodsQty </li>
    <li>protectedMethodsQty </li>
    <li>defaultMethodsQty </li>
    <li>abstractMethodsQty </li>
    <li>finalMethodsQty </li>
    <li>synchronizedMethodsQty </li>
    <li>totalFieldsQty </li>
    <li>staticFieldsQty </li>
    <li>publicFieldsQty </li>
    <li>privateFieldsQty </li>
    <li>protectedFieldsQty</li>
    <li>defaultFieldsQty </li>
    <li>visibleFieldsQty </li>
    <li>finalFieldsQty </li>
    <li>synchronizedFieldsQty </li>
    <li>nosi: Number of static invocations invocations to static methods </li>
    <li>loc: Lines of code</li>
    <li>returnQty</li>
    <li>loopQty </li>
    <li>comparisonsQty</li>
    <li>tryCatchQty</li>
    <li>parenthesizedExpsQty</li>
    <li>stringLiteralsQty</li>
    <li>numbersQty: Number literals</li>
    <li>assignmentsQty</li>
    <li>mathOperationsQty</li>
    <li>variablesQty</li>
    <li>maxNestedBlocksQty: The highest number of blocks nested together.</li>
    <li>anonymousClassesQty</li>
    <li>innerClassesQty</li>
    <li>lambdasQty</li>
    <li>uniqueWordsQty</li>
    <li>modifiers</li>
</ul>

In [None]:
from nbdev.export import notebook2script
notebook2script()

Converted 0.0_mgmnt.prep.i.ipynb.
Converted 0.1_mgmnt.prep.conv.ipynb.
Converted 0.3_mgmnt.prep.bpe.ipynb.
Converted 0.6_mgmnt.prep.nltk.ipynb.
Converted 0.7_metrics_module_python.ipynb.
Converted 0.8_metrics_module_java.ipynb.
Converted 0.9_metrics_example.ipynb.
Converted 1.0_exp.i.ipynb.
Converted 1.1_exp.info-[inspect].ipynb.
Converted 1.1_exp.info.ipynb.
Converted 1.2_exp.csnc.ipynb.
Converted 1.2_exp.gen.code.ipynb.
Converted 1.3_exp.csnc_python.ipynb.
Converted 2.0_repr.codebert.ipynb.
Converted 2.0_repr.i.ipynb.
Converted 2.1_repr.codeberta.ipynb.
Converted 2.1_repr.roberta.train.ipynb.
Converted 2.2_repr.roberta.eval.ipynb.
Converted 2.3_repr.word2vec.train.ipynb.
Converted 2.6_repr.word2vec.eval.ipynb.
Converted 2.7_repr.distmetrics.ipynb.
Converted 2.8_repr.sentence_transformers.ipynb.
Converted 3.1_mining.unsupervised.traceability.eda.ipynb.
Converted 3.2_mining.unsupervised.eda.traceability.d2v.ipynb.
This cell doesn't have an export destination and was ignored:
e
This cel