In [None]:
# default_exp codexplainer.metrics_example

In [None]:
import ds4se as ds
from ds4se.metrics_python import *
from ds4se.metrics_java import *

In [None]:
import pandas as pd

Compute metrics for Python and Java source code using 3rd party libraries

For example purposes some datasets are loaded

## Python

In [None]:
#TODO create general parameters
python_df = pd.read_csv('/tf/data/clean_python.csv')

In [None]:
python_samples = python_df.sample(100)

In [None]:
py_analyzer = PythonAnalyzer()

In [None]:
help(py_analyzer)

Help on PythonAnalyzer in module ds4se.metrics_python object:

class PythonAnalyzer(builtins.object)
 |  Class aimed to obtain metrics from a dataset of python source code records.
 |  Metrics computation is performed via an open source library.
 |  
 |  Methods defined here:
 |  
 |  compute_and_save_metrics_for_df(self, df_series, destination_path)
 |      Computes metrics (static analysis) for a pandas df column (series).
 |      Additionaly, exports metrics results to a csv file located at the specified path
 |      
 |      Params:
 |      # df_series: Pandas DF column containing source code records
 |      # destination_path: string indicating full path (including filename) for the exported file
 |      
 |      Returns:
 |      Tuple comprising
 |      
 |      - Pandas DataFrame with computed metrics
 |      - List of records' indices for which metrics could not be computed
 |  
 |  compute_metrics_for_df_series(self, df_series)
 |      Computes metrics (static analysis) for a 

The method returns a tuple with 2 elements:
<ul>
    <li>metrics_df: 
        <p>pandas dataframe with the computed metrics.</p>
    </li>
    <li>error_indices
        <p>list of integers with the indices of the records for which metrics could not be computed/p>
    </li>
</ul>

In [None]:
metrics_df, error_indices = py_analyzer.compute_metrics_for_df_series(python_samples['code'])

There was a problem computing metrics for 2 records.


In [None]:
metrics_df.head()

Unnamed: 0,sample,loc,lloc,sloc,comments,multi,blank,single_comments,h1,h2,...,length,calculated_length,volume,difficulty,effort,time,bugs,complexity,maint_idx,maint_idx_rank
0,244417.0,9.0,6.0,5.0,0.0,3.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,100.0,A
1,116407.0,12.0,8.0,11.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,100.0,A
2,170388.0,10.0,3.0,2.0,0.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,100.0,A
3,431088.0,6.0,6.0,5.0,0.0,0.0,0.0,1.0,2.0,4.0,...,6.0,10.0,15.509775,1.0,15.509775,0.861654,0.00517,2.0,74.419795,A
4,44362.0,18.0,14.0,14.0,0.0,0.0,3.0,1.0,1.0,5.0,...,12.0,11.60964,31.01955,0.8,24.81564,1.378647,0.01034,6.0,63.746935,A


In [None]:
error_indices

[20306, 179905]

With the indices, further exploration can be performed on problematic records

In [None]:
error_records = python_samples.loc[error_indices, ['code']]

In [None]:
error_records

Unnamed: 0,code
20306,"def _group_flush(self,group,name):\n """"..."
179905,"def execute(self, sql, params=None):\n ..."


## Java

CK jar (file required for the 3d party library) is located at ck_jar_path

In [None]:
ck_jar_path = 'ck_metrics_tool/ck-metrics.jar'

In [None]:
java_df = pd.read_csv('/tf/main/nbs/test_data/clean_java.csv')

In [None]:
java_samples = java_df.sample(100)

In [None]:
java_analyzer = JavaAnalyzer(ck_jar_path)

In [None]:
help(java_analyzer)

Help on JavaAnalyzer in module ds4se.metrics_java object:

class JavaAnalyzer(builtins.object)
 |  Class get metrics f
 |  
 |  Methods defined here:
 |  
 |  __init__(self, ck_jar_path)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  compute_metrics(self, df_series, files_destination_path)
 |      Computes metrics for a pandas series of java source code snippets
 |      
 |      Params
 |      # df_series: Pandas series (df column) containing java source snippets
 |      # files_destination_path: Path indicating where the physical .java files are going to be created (for metrics computation)
 |      
 |      Returns:
 |      
 |      Pandas Dataframe containing metrics
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)



In [None]:
metrics_df = java_analyzer.compute_metrics(java_samples['code'], '/tf/main/nbs/test_data/test_metrics')

Generating physical .java files.
CK package produced this output:
[b'', b'log4j:WARN No appenders could be found for logger (com.github.mauricioaniche.ck.CK).', b'log4j:WARN Please initialize the log4j system properly.', b'']


In [None]:
metrics_df.head()

Unnamed: 0,file,class,wmc,totalMethodsQty,staticMethodsQty,publicMethodsQty,privateMethodsQty,protectedMethodsQty,defaultMethodsQty,abstractMethodsQty,...,numbersQty,assignmentsQty,mathOperationsQty,variablesQty,maxNestedBlocksQty,anonymousClassesQty,innerClassesQty,lambdasQty,uniqueWordsQty,modifiers
0,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord142975,2,1,0,1,0,0,0,1,...,1,1,0,1,1,0,0,0,20,1
1,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord297973,1,1,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,14,1
2,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord256532,1,1,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,13,1
3,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord138649,4,1,1,1,0,0,0,1,...,1,2,1,2,2,0,0,0,15,1
4,/tf/main/nbs/test_data/test_metrics/ClassRecor...,ClassRecord441457,2,1,0,1,0,0,0,1,...,0,0,0,1,0,0,0,1,9,1
