In [1]:
import os
import pandas as pd
import glob

In [2]:
projects = os.listdir('../projects/')

projects

['commons-numbers',
 'joda-primitives',
 'spring-data-commons',
 'spring-ws',
 'error-prone',
 'spring-data-keyvalue',
 'commons-net',
 'jackson-databind',
 'commons-validator',
 'exp4j',
 'joda-collect',
 'commons-text',
 'jimfs',
 'commons-bcel',
 'commons-dbutils',
 'commons-cli',
 'raml-java-parser',
 'commons-pool',
 'highwheel',
 'joda-time',
 'commons-imaging',
 'commons-codec',
 'joda-money',
 'dagger',
 'closure-stylesheets',
 'orson-charts',
 'zxing',
 'truth',
 'commons-fileupload',
 'jackson-core',
 'commons-configuration',
 'commons-email',
 'undertow']

In [3]:

file_name = 'commons-cli.csv' #.format(project)
df_mutation = pd.read_csv('mutation/{}'.format(file_name))
df_code_smells = pd.read_csv('code-smells/{}'.format(file_name))
df_test_smells = pd.read_csv('test-smells/{}'.format(file_name))
df_ck_metrics = pd.read_csv('ck-metrics/{}'.format(file_name))
df_readability = pd.read_csv('readability/{}'.format(file_name), names=['path_test','readability'], header=None)
print('~~~~~~~~~~~~~~~~~~ {} ~~~~~~~~~~~~~~~~'.format(file_name))
print(df_mutation['path_test'])
print(df_mutation['path_src'])

print(df_readability)

print(pd.merge(df_readability,df_mutation,on='path_test')[['test_name','readability']])


#print(df_ck_metrics)
print(df_code_smells)
#print(df_test_smells)
#print(df_readability)



~~~~~~~~~~~~~~~~~~ commons-cli.csv ~~~~~~~~~~~~~~~~
0     /home/dorma10/mutation-analysis/projects/commo...
1     /home/dorma10/mutation-analysis/projects/commo...
2     /home/dorma10/mutation-analysis/projects/commo...
3     /home/dorma10/mutation-analysis/projects/commo...
4     /home/dorma10/mutation-analysis/projects/commo...
5     /home/dorma10/mutation-analysis/projects/commo...
6     /home/dorma10/mutation-analysis/projects/commo...
7     /home/dorma10/mutation-analysis/projects/commo...
8     /home/dorma10/mutation-analysis/projects/commo...
9     /home/dorma10/mutation-analysis/projects/commo...
10    /home/dorma10/mutation-analysis/projects/commo...
11    /home/dorma10/mutation-analysis/projects/commo...
Name: path_test, dtype: object
0     /home/dorma10/mutation-analysis/projects/commo...
1     /home/dorma10/mutation-analysis/projects/commo...
2     /home/dorma10/mutation-analysis/projects/commo...
3     /home/dorma10/mutation-analysis/projects/commo...
4     /home/dorma10/m

In [4]:
__author__ = "Giovanni Grano"
__license__ = "MIT"
__email__ = "grano@ifi.uzh.ch"

import pandas as pd
import glob
#from effectiveness.settings import *


def process_results(mutation=None, smells=None,
                    ck=None,
                    code_smells=None,
                    readability=None,
                    output=None):
    """
    It aggregates into a single csv file all the metrics about mutation, coverage, smells and ck-metrics
    separately computed

    Aggregates the result from the mutation with the other metrics we compute
    :param mutation: the csv with mutation score and line coverage
    :param smells: the csv with the test smells
    :param ck: the csv with the code metrics
    :param code_smells: the directory that contains the code smells metrics (1 for each project)
    :param readability: the directory that contains the two files for the readability (1 for CUT, 1 for test)
    :param output: the output csv

    """
    if os.path.exists(mutation) and os.path.exists(smells) and os.path.exists(ck) and os.path.exists(code_smells)\
            and os.path.exists(readability):
        print("* Processing {}".format(mutation))
    else:
        print("* One or more input files are missing\nPlease check the previous steps of the pipeline")
        exit(0)

    mutation_frame = pd.read_csv(mutation)
    print("* Number of originally executed mutations = {}".format(len(mutation_frame)))
    mutation_frame = mutation_frame.dropna(subset=['mutation', 'line_coverage'])
    print("* Number of successfully mutation = {}".format(len(mutation_frame)))
    smells_frame = pd.read_csv(smells)
    code_smells_frame = pd.read_csv(code_smells)
    ck_frame = pd.read_csv(ck)

    print("*-------------------------------------------")

    # filter according to the test smells we have
    
    all_tests = smells_frame['class'].tolist()
    filtered_frame = mutation_frame[mutation_frame['test_name'].isin(all_tests)]
    print('* After smells \t{}'.format(len(filtered_frame)))

    print("*-------------------------------------------")

    # filter according to the ck metrics we have
    all_tests = ck_frame['class'].tolist()
    filtered_frame = filtered_frame[filtered_frame['test_name'].isin(all_tests)]
    filtered_frame = filtered_frame[filtered_frame['class_name'].isin(all_tests)]
    print('* After cks \t{}'.format(len(filtered_frame)))
    prod_readability = pd.read_csv(readability, names=['path_src','readability'], header=None)
    prod_readability = pd.merge(prod_readability,mutation_frame,on='path_src')[['class_name','readability']]
    all_classes = prod_readability['class_name'].tolist()
    filtered_frame = filtered_frame[filtered_frame['class_name'].isin(all_classes)]
    print("* After class readability = {}".format(filtered_frame.shape[0]))
    test_readability = pd.read_csv(readability, names=['path_test','readability'], header=None)
    test_readability = pd.merge(test_readability,mutation_frame,on='path_test')[['test_name','readability']]
    all_tests = test_readability['test_name'].tolist()
    filtered_frame = filtered_frame[filtered_frame['test_name'].isin(all_tests)]
    print("* After test readability = {}".format(filtered_frame.shape[0]))

    test_smells_metrics = ['AssertionRoulette', 'EagerTest', 'LazyTest', 'MysteryGuest',
                           'SensitiveEquality', 'ResourceOptimism', 'ForTestersOnly',
                           'IndirectTesting']
    code_ck_metrics = ['LOC', 'HALSTEAD', 'RFC', 'CBO', 'MPC', 'IFC', 'DAC', 'DAC2', 'LCOM1',
                       'LCOM2', 'LCOM3', 'LCOM4', 'CONNECTIVITY', 'LCOM5', 'COH', 'TCC', 'LCC', 'ICH',
                       'WCM', 'NOA', 'NOPA', 'NOP', 'McCABE'] #, 'BUSWEIMER']
    code_smells_metrics = ['csm_CDSBP', 'csm_CC', 'csm_FD', 'csm_Blob', 'csm_SC', 'csm_MC', 'csm_LM', 'csm_FE']

    print("*-------------------------------------------")
    print("* Processing test smells:")
    for smell in test_smells_metrics:
        print("- Processing {}".format(smell))
        filtered_frame[smell] = filtered_frame.apply(lambda x: get_smell_value(x, smells_frame, smell), axis=1)

    print("*-------------------------------------------")
    print("* Processing ck metric for production:")
    for metric in code_ck_metrics:
        print("- Processing {}".format(metric))
        filtered_frame[metric+"_prod"] = filtered_frame.apply(lambda x: get_ck_value(x,
                                                                                     ck_frame,
                                                                                     metric),
                                                              axis=1)

    print("*-------------------------------------------")
    print("* Processing ck metric for tests:")
    for metric in code_ck_metrics:
        print("- Processing {}".format(metric))
        filtered_frame[metric+"_test"] = filtered_frame.apply(lambda x: get_ck_value(x,
                                                                                     ck_frame,
                                                                                     metric,
                                                                                     'test_name'),
                                                              axis=1)

    print("*-------------------------------------------")
    print("* Processing code smells for productions:")
    
    for smell in code_smells_metrics:
        print("- Processing {}".format(smell))
        filtered_frame[smell] = filtered_frame.apply(lambda x: get_smell_value(x, code_smells_frame, smell, key='class_name'), axis=1)

    #files = glob.glob(code_smells+'/*.csv')
    #code_smell_frame = pd.concat([pd.read_csv(f) for f in files])
    
    #for metric in code_smells_metrics:
    #    print("- Processing {}".format(metric))
    #    filtered_frame[metric] = filtered_frame.apply(lambda x: get_production_code_smell(x, code_smell_frame, metric),
     #                                                 axis=1)

    print("*-------------------------------------------")
    print("* Processing readability:")
    print("- Processing production readability")
    filtered_frame['prod_readability'] = filtered_frame.apply(lambda x: get_readability(x, prod_readability),
                                                              axis=1)
    print("- Processing test readability")
    filtered_frame['test_readability'] = filtered_frame.apply(lambda x: get_readability(x, test_readability, key2='test_name'),
                                                              axis=1)

    print("*-------------------------------------------")
    print("* Saving the aggregate in {}".format(output))
    filtered_frame.to_csv(output, index=False)


def get_process_metric(row, flag, ck_frame, metric, verbose=False):
    """
    Returns the value for the given metric for a particular source
    :param row: the data frame row for the test
    :param flag: true if that's a production class; false if a test
    :param ck_frame: the frame with all the ck metrics
    :param metric: the given metric

    """
    if flag:
        name = 'class_name'
    else:
        name = 'test_name'
    cut = row[name]

    aux = ck_frame[ck_frame['class'] == cut][metric]
    if len(aux) != 1 and verbose:
        print("\t* Two entries for {}".format(cut))
    return aux.iloc[0]


def get_readability(row, frame, key='class_name', key2='class_name', verbose=False):
    """
    Returns the readability for the given production class or test
    :param row: the row of the original frame
    :param frame: the readability fame
    :param key: the key in the passed frame
    """
    cut = row[key2]
    aux = frame[frame[key2] == cut]['readability']
    if len(aux) != 1 and verbose:
        print("\t* {} entries for {}".format(str(len(aux)), cut))
    return aux.iloc[0]


def get_production_code_smell(row, frame, smell, key='class_name', verbose=False):
    """
    Returns the value for the given code smell for a production class
    :param row:
    :param frame:
    :param smell:
    :param key:
    """
    cut = row[key]
    aux = frame[frame['className'] == cut][smell]
    if len(aux) != 1 and verbose:
        print("\t* {} entries for {}".format(str(len(aux)), cut))
    return int(aux.iloc[0])


def get_smell_value(row, smells_frame, smell, key='test_name', verbose=False):
    """
    Returns the value for the given smell for a particular test
    :param row: the data frame row for the test
    :param smells_frame: the frame with all the smells
    :param smell: the kind of smell
    :param key: the key to look for in the frame
    """
    cut = row[key]
    aux = smells_frame[smells_frame['class'] == cut][smell]
    if len(aux) != 1 and verbose:
        print("\t* {} entries for {}".format(str(len(aux)), cut))
    return aux.iloc[0]


def get_ck_value(row, ck_frame, metric, key='class_name', verbose=False):
    """
    Returns the value for the given metric for a particular source
    :param row: the data frame row for the test
    :param ck_frame: the frame with all the ck metrics
    :param metric: the given metric
    :param key: the key for the metric

    """
    cut = row[key]
    
    aux = ck_frame[ck_frame['class'] == cut][metric]
    if len(aux) != 1 and verbose:
        print("\t* {} entries for {}".format(str(len(aux)), cut))
    return aux.iloc[0]


def separate_sets(complete_frame='complete_fame_extnsion.csv', delimiter='quartile',
                  name_good='good_tests', name_bad='bad_tests'):
    """
    It separates
    :param complete_frame: the frame to read with the metrics
    :param delimiter: the valued used to split the sets
    :param name_good: the name for the frame with the effective tests
    :param name_bad: the name for the frame with the non effective tests
    :return:
    """
    frame = pd.read_csv(complete_frame)
    median = frame.mutation.median()
    quantiles = frame.mutation.quantile([0.25, 0.75])
    lower_quantile = quantiles[0.25]
    upper_quantile = quantiles[0.75]

    if delimiter == 'quartile':
        bad_tests = frame[frame['mutation'] <= lower_quantile]
        good_tests = frame[frame['mutation'] >= upper_quantile]
        bad_tests.to_csv('{}.csv'.format(name_good), index=False)
        good_tests.to_csv('{}.csv'.format(name_bad), index=False)
        print("* Good tests quantile = {}".format(len(good_tests)))
        print("* Bad tests quantile = {}".format(len(bad_tests)))
    else:
        bad_tests = frame[frame['mutation'] <= median]
        good_tests = frame[frame['mutation'] > median]
        bad_tests.to_csv('{}_median.csv'.format(name_good), index=False)
        good_tests.to_csv('{}_median.csv'.format(name_bad), index=False)
        print("* Good tests median = {}".format(len(good_tests)))
        print("* Bad tests median = {}".format(len(bad_tests)))


def count_smells(complete_frame='merge.csv'):
    """
    Prints the number of the detected smells into the dataset
    :param complete_frame: the csv file to read
    """
    frame = pd.read_csv(complete_frame)
    test_smells_metrics = ['isAssertionRoulette', 'isEagerTest', 'isLazyTest', 'isMysteryGuest',
                           'isSensitiveEquality', 'isResourceOptimism', 'isForTestersOnly',
                           'isIndirectTesting']
    overall = 0
    for metric in test_smells_metrics:
        counts = frame[metric].sum()
        overall = overall + counts
        print('{} = {}'.format(metric, counts))
    print('Overall = {}'.format(overall))

    code_smells_metrics = ['csm_CDSBP', 'csm_CC', 'csm_FD', 'csm_Blob', 'csm_SC', 'csm_MC', 'csm_LM', 'csm_FE']

    overall = 0
    for metric in code_smells_metrics:
        counts = frame[metric].sum()
        overall = overall + counts
        print('{} = {}'.format(metric, counts))
    print('Overall = {}'.format(overall))


#if __name__ == '__main__':

    #for operator in ALL_OPERATORS:
    
    
    '''
        file_name = '{}.csv'.format(project)
    df_mutation = pd.read_csv()
    df_code_smells = pd.read_csv('code-smells/{}'.format(file_name))
    df_test_smells = pd.read_csv('test-smells/{}'.format(file_name))
    df_ck_metrics = pd.read_csv('ck-metrics/{}'.format(file_name))
    df_readability = pd.read_csv('readability/{}'.format(file_name))
    print('~~~~~~~~~~~~~~~~~~ {} ~~~~~~~~~~~~~~~~'.format(project))
    print(df_mutation)
    print(df_ck_metrics)
    print(df_code_smells)
    print(df_test_smells)
    print(df_readability)
    
    
    '''
    


   # separate_sets(complete_frame=METRICS_DIR+'/merge-{}.csv'.format(project),
   #               name_good='good_tests-{}'.format(project),
   #               name_bad='bad_tests-{}'.format(project))
    # process_results()
    # separate_sets()


In [5]:
projects = os.listdir('../projects/')
for project in projects:
    print('~~~~~~~~~~~~~~~~~~~~ Aggregating {} ~~~~~~~~~~~~~~~~~~~~~~~~~~~'.format(project))
    project = '{}.csv'.format(project)
    process_results(mutation='mutation/{}'.format(project),
                    smells='test-smells/{}'.format(project),
                    ck='ck-metrics/{}'.format(project),
                    code_smells='code-smells/{}'.format(project),
                    readability='readability/{}'.format(project),
                    output='merged/merge-{}'.format(project))

~~~~~~~~~~~~~~~~~~~~ Aggregating commons-numbers ~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Processing mutation/commons-numbers.csv
* Number of originally executed mutations = 40
* Number of successfully mutation = 40
*-------------------------------------------
* After smells 	40
*-------------------------------------------
* After cks 	40
* After class readability = 40
* After test readability = 40
*-------------------------------------------
* Processing test smells:
- Processing AssertionRoulette
- Processing EagerTest
- Processing LazyTest
- Processing MysteryGuest
- Processing SensitiveEquality
- Processing ResourceOptimism
- Processing ForTestersOnly
- Processing IndirectTesting
*-------------------------------------------
* Processing ck metric for production:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CON

- Processing NOP
- Processing McCABE
*-------------------------------------------
* Processing code smells for productions:
- Processing csm_CDSBP
- Processing csm_CC
- Processing csm_FD
- Processing csm_Blob
- Processing csm_SC
- Processing csm_MC
- Processing csm_LM
- Processing csm_FE
*-------------------------------------------
* Processing readability:
- Processing production readability
- Processing test readability
*-------------------------------------------
* Saving the aggregate in merged/merge-spring-ws.csv
~~~~~~~~~~~~~~~~~~~~ Aggregating error-prone ~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Processing mutation/error-prone.csv
* Number of originally executed mutations = 59
* Number of successfully mutation = 59
*-------------------------------------------
* After smells 	58
*-------------------------------------------
* After cks 	58
* After class readability = 58
* After test readability = 58
*-------------------------------------------
* Processing test smells:
- Processing Assertion

- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CONNECTIVITY
- Processing LCOM5
- Processing COH
- Processing TCC
- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Processing NOP
- Processing McCABE
*-------------------------------------------
* Processing code smells for productions:
- Processing csm_CDSBP
- Processing csm_CC
- Processing csm_FD
- Processing csm_Blob
- Processing csm_SC
- Processing csm_MC
- Processing csm_LM
- Processing csm_FE
*-------------------------------------------
* Processing readability:
- Processing production readability
- Processing test readability
*-------------------------------------------
* Saving the aggregate in merged/merge-jackson-databind.csv
~~~~~~~~~~~~~~~~~~~~ Aggregating commons-validator ~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Processing mutation/commons-validator.csv
* 

- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CONNECTIVITY
- Processing LCOM5
- Processing COH
- Processing TCC
- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Processing NOP
- Processing McCABE
*-------------------------------------------
* Processing ck metric for tests:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CONNECTIVITY
- Processing LCOM5
- Processing COH
- Processing TCC
- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Processing NOP
- Processing McCABE
*-------------------------------------------
* Processing code smells for productions:
- Processing csm_CDSBP
- Processing csm_CC
- Processing csm_FD
- Processing csm_Blob
- Processing csm

- Processing IndirectTesting
*-------------------------------------------
* Processing ck metric for production:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CONNECTIVITY
- Processing LCOM5
- Processing COH
- Processing TCC
- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Processing NOP
- Processing McCABE
*-------------------------------------------
* Processing ck metric for tests:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CONNECTIVITY
- Processing LCOM5
- Processing COH
- Processing TCC
- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Pro

- Processing EagerTest
- Processing LazyTest
- Processing MysteryGuest
- Processing SensitiveEquality
- Processing ResourceOptimism
- Processing ForTestersOnly
- Processing IndirectTesting
*-------------------------------------------
* Processing ck metric for production:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CONNECTIVITY
- Processing LCOM5
- Processing COH
- Processing TCC
- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Processing NOP
- Processing McCABE
*-------------------------------------------
* Processing ck metric for tests:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processi

*-------------------------------------------
* After smells 	6
*-------------------------------------------
* After cks 	6
* After class readability = 6
* After test readability = 6
*-------------------------------------------
* Processing test smells:
- Processing AssertionRoulette
- Processing EagerTest
- Processing LazyTest
- Processing MysteryGuest
- Processing SensitiveEquality
- Processing ResourceOptimism
- Processing ForTestersOnly
- Processing IndirectTesting
*-------------------------------------------
* Processing ck metric for production:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Processing DAC2
- Processing LCOM1
- Processing LCOM2
- Processing LCOM3
- Processing LCOM4
- Processing CONNECTIVITY
- Processing LCOM5
- Processing COH
- Processing TCC
- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Processing NOP
- Processing McCABE
*----------------------

- Processing test readability
*-------------------------------------------
* Saving the aggregate in merged/merge-zxing.csv
~~~~~~~~~~~~~~~~~~~~ Aggregating truth ~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Processing mutation/truth.csv
* Number of originally executed mutations = 37
* Number of successfully mutation = 37
*-------------------------------------------
* After smells 	37
*-------------------------------------------
* After cks 	37
* After class readability = 37
* After test readability = 37
*-------------------------------------------
* Processing test smells:
- Processing AssertionRoulette
- Processing EagerTest
- Processing LazyTest
- Processing MysteryGuest
- Processing SensitiveEquality
- Processing ResourceOptimism
- Processing ForTestersOnly
- Processing IndirectTesting
*-------------------------------------------
* Processing ck metric for production:
- Processing LOC
- Processing HALSTEAD
- Processing RFC
- Processing CBO
- Processing MPC
- Processing IFC
- Processing DAC
- Proc

- Processing LCC
- Processing ICH
- Processing WCM
- Processing NOA
- Processing NOPA
- Processing NOP
- Processing McCABE
*-------------------------------------------
* Processing code smells for productions:
- Processing csm_CDSBP
- Processing csm_CC
- Processing csm_FD
- Processing csm_Blob
- Processing csm_SC
- Processing csm_MC
- Processing csm_LM
- Processing csm_FE
*-------------------------------------------
* Processing readability:
- Processing production readability
- Processing test readability
*-------------------------------------------
* Saving the aggregate in merged/merge-commons-configuration.csv
~~~~~~~~~~~~~~~~~~~~ Aggregating commons-email ~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Processing mutation/commons-email.csv
* Number of originally executed mutations = 14
* Number of successfully mutation = 14
*-------------------------------------------
* After smells 	14
*-------------------------------------------
* After cks 	14
* After class readability = 14
* After test readabil

In [6]:
all_filenames = [i for i in glob.glob("merged/*.csv")]
combined_csv_data = pd.concat([pd.read_csv(f) for f in all_filenames])
combined_csv_data

Unnamed: 0,project,module,commit,path_test,test_name,path_src,class_name,mutation,no_mutations,line_coverage,...,csm_CDSBP,csm_CC,csm_FD,csm_Blob,csm_SC,csm_MC,csm_LM,csm_FE,prod_readability,test_readability
0,raml-java-parser,,60639a3b22245d8bd5c79a2fcd09a56428612332,/home/dorma10/mutation-analysis/projects/raml-...,org.raml.parser.rules.UriParametersRuleTestCase,/home/dorma10/mutation-analysis/projects/raml-...,org.raml.parser.rule.UriParametersRule,0.531250,128.0,0.709677,...,False,False,False,False,False,False,False,False,0.643899,0.182479
1,raml-java-parser,,60639a3b22245d8bd5c79a2fcd09a56428612332,/home/dorma10/mutation-analysis/projects/raml-...,org.raml.parser.rules.SchemaRuleTestCase,/home/dorma10/mutation-analysis/projects/raml-...,org.raml.parser.rule.SchemaRule,0.656250,320.0,0.714286,...,False,False,False,False,False,True,False,True,0.623326,0.474510
2,raml-java-parser,,60639a3b22245d8bd5c79a2fcd09a56428612332,/home/dorma10/mutation-analysis/projects/raml-...,org.raml.parser.loader.DefaultResourceLoaderTe...,/home/dorma10/mutation-analysis/projects/raml-...,org.raml.parser.loader.DefaultResourceLoader,0.878049,41.0,1.000000,...,False,False,False,False,False,False,False,False,0.644335,0.669817
0,undertow,core,483953cbf7691df2b5eb591d0fdc03e91b24d094,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.SimpleObjectPoolTestCase,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.SimpleObjectPool,0.000000,24.0,0.000000,...,False,False,False,False,False,False,False,False,0.551469,0.411308
1,undertow,core,483953cbf7691df2b5eb591d0fdc03e91b24d094,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.SameSiteNoneIncompatibleClien...,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.SameSiteNoneIncompatibleClien...,0.699686,636.0,0.901961,...,False,False,False,False,False,False,False,False,0.748899,0.331151
2,undertow,core,483953cbf7691df2b5eb591d0fdc03e91b24d094,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.LocaleUtilsTestCase,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.LocaleUtils,0.117647,51.0,0.105263,...,False,False,False,False,False,False,False,False,0.597005,0.710809
3,undertow,core,483953cbf7691df2b5eb591d0fdc03e91b24d094,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.PathMatcherTestCase,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.PathMatcher,0.518625,349.0,0.698795,...,False,False,False,False,False,False,False,False,0.766056,0.868378
4,undertow,core,483953cbf7691df2b5eb591d0fdc03e91b24d094,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.HeaderTokenParserTestCase,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.HeaderTokenParser,0.662083,509.0,0.836066,...,False,False,False,False,False,False,False,True,0.393627,0.105674
5,undertow,core,483953cbf7691df2b5eb591d0fdc03e91b24d094,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.HttpStringTestCase,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.HttpString,0.462926,998.0,0.436975,...,False,False,False,False,False,False,False,False,0.856083,0.682064
6,undertow,core,483953cbf7691df2b5eb591d0fdc03e91b24d094,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.HeaderMapTestCase,/home/dorma10/mutation-analysis/projects/under...,io.undertow.util.HeaderMap,0.357334,2611.0,0.472554,...,False,False,False,True,False,False,False,False,0.616741,0.480069


In [7]:
combined_csv_data.to_csv('complete_fame_extnsion.csv')

In [8]:
separate_sets(complete_frame='complete_fame_extnsion.csv')

* Good tests quantile = 346
* Bad tests quantile = 346
