In [5]:
import pandas as pd
import numpy as np
import subprocess, re, os, time

from multiprocessing import Pool

from tqdm import tqdm

In [7]:
all_eval_releases = ['activemq-5.2.0','activemq-5.3.0','activemq-5.8.0',
                     'camel-2.10.0','camel-2.11.0', 
                     'derby-10.5.1.1',
                     'groovy-1_6_BETA_2', 
                     'hbase-0.95.2',
                     'hive-0.12.0', 
                     'jruby-1.5.0','jruby-1.7.0.preview1',
                     'lucene-3.0.0','lucene-3.1', 
                     'wicket-1.5.3']

all_dataset_name = ['activemq','camel','derby','groovy','hbase','hive','jruby','lucene','wicket']

base_file_dir = './ErrorProne_data/'
base_command = "javac -J-Xbootclasspath/p:javac-9+181-r4173-1.jar -XDcompilePolicy=simple -processorpath error_prone_core-2.4.0-with-dependencies.jar:dataflow-shaded-3.1.2.jar:jFormatString-3.0.0.jar '-Xplugin:ErrorProne -XepDisableAllChecks -Xep:CollectionIncompatibleType:ERROR' "

result_dir = './ErrorProne_result/'

if not os.path.exists(result_dir):
    os.makedirs(result_dir)
    

In [10]:
def run_ErrorProne(rel):
    df_list = []
    java_file_dir = base_file_dir+rel+'/'

    file_list = os.listdir(java_file_dir)
    
    for java_filename in tqdm(file_list):        
        f = open(java_file_dir+java_filename,'r',encoding='utf-8',errors='ignore')
        java_code = f.readlines()

        code_len = len(java_code)

        output = subprocess.getoutput(base_command+java_file_dir+java_filename)

        reported_lines = re.findall('\d+: error:',output)
        reported_lines = [int(l.replace(':','').replace('error','')) for l in reported_lines]
        reported_lines = list(set(reported_lines))

        line_df = pd.DataFrame()

        line_df['filename'] = [java_filename.replace('_','/')]*code_len
        line_df['test-release'] = [rel]*len(line_df)
        line_df['line_number'] = np.arange(1,code_len+1)
        line_df['EP_prediction_result'] = line_df['line_number'].isin(reported_lines)

        df_list.append(line_df)

    final_df = pd.concat(df_list)
    final_df.to_csv(result_dir+rel+'-line-lvl-result.txt',index=False)
    print('finished',rel)

In [11]:
agents = 5
chunksize = 8

with Pool(processes=agents) as pool:
    pool.map(run_ErrorProne, all_eval_releases, chunksize)

100%|██████████| 25/25 [00:59<00:00,  2.36s/it]]
 35%|███▍      | 27/78 [00:59<01:52,  2.21s/it]

finished jruby-1.5.0


100%|██████████| 38/38 [01:29<00:00,  2.34s/it]]
 86%|████████▌ | 67/78 [02:28<00:24,  2.24s/it]

finished jruby-1.7.0.preview1


100%|██████████| 78/78 [02:52<00:00,  2.21s/it]]
 58%|█████▊    | 76/130 [02:52<02:17,  2.54s/it]

finished activemq-5.1.0


100%|██████████| 41/41 [01:30<00:00,  2.21s/it]t]


finished activemq-5.2.0


100%|██████████| 62/62 [02:14<00:00,  2.17s/it]t]


finished lucene-2.9.0


100%|██████████| 130/130 [04:58<00:00,  2.29s/it]


finished derby-10.5.1.1


100%|██████████| 30/30 [01:11<00:00,  2.37s/it]]
 46%|████▌     | 47/103 [01:47<02:09,  2.32s/it]

finished groovy-1_6_BETA_1


100%|██████████| 55/55 [02:00<00:00,  2.19s/it]]
 45%|████▌     | 14/31 [00:33<00:38,  2.28s/it]

finished lucene-3.0.0


100%|██████████| 31/31 [01:12<00:00,  2.33s/it]]


finished groovy-1_6_BETA_2


100%|██████████| 38/38 [01:23<00:00,  2.19s/it]]


finished lucene-3.1


100%|██████████| 103/103 [03:52<00:00,  2.25s/it]
 19%|█▉        | 22/115 [00:52<03:25,  2.21s/it]

finished activemq-5.3.0


100%|██████████| 71/71 [02:34<00:00,  2.17s/it]]
 73%|███████▎  | 84/115 [03:18<01:21,  2.62s/it]

finished wicket-1.3.0-beta2


100%|██████████| 115/115 [04:27<00:00,  2.33s/it]


finished hbase-0.95.0


100%|██████████| 102/102 [03:49<00:00,  2.25s/it]


finished activemq-5.8.0


100%|██████████| 50/50 [01:51<00:00,  2.22s/it]]


finished wicket-1.5.3


100%|██████████| 89/89 [02:26<00:00,  1.65s/it]]


finished camel-2.9.0


100%|██████████| 110/110 [03:12<00:00,  1.75s/it]


finished hbase-0.95.2


100%|██████████| 73/73 [01:59<00:00,  1.64s/it]]


finished hive-0.10.0


100%|██████████| 111/111 [02:50<00:00,  1.54s/it]


finished camel-2.10.0


100%|██████████| 119/119 [02:56<00:00,  1.49s/it]


finished camel-2.11.0


100%|██████████| 171/171 [04:37<00:00,  1.62s/it]
 19%|█▊        | 53/285 [01:23<05:52,  1.52s/it]

finished hive-0.12.0


100%|██████████| 127/127 [03:22<00:00,  1.59s/it]
 65%|██████▌   | 186/285 [04:46<02:27,  1.49s/it]

finished jruby-1.4.0


100%|██████████| 285/285 [06:29<00:00,  1.37s/it]


finished derby-10.3.1.4
