In [1]:
%config Completer.use_jedi = False
import sys
sys.path.append('../')

In [4]:
from checklist.test_suite import TestSuite
import pandas as pd

pd.set_option('display.max_colwidth', None)

def load_suite(path):
    suite = TestSuite.from_file(path)
    summary_table = suite.visual_summary_table()
    
    return suite, summary_table


def proccess_summary_do_dataframe(suite, summary_table):
    test_templates = summary_table.get_state()['test_infos']

    items = []
    templates = []
    
    for test_template in test_templates:
        test_template_name = test_template['name']
        template_name = test_template_name.split(' - ')[1]
        test_cases = suite.visual_summary_by_test(test_template_name).filtered_testcases

        templates.append(template_name)

        for item in test_cases:
            text = item['examples'][0]['new']['text']
            pred = item['examples'][0]['new']['pred']
            label = item['examples'][0]['label']
            succeed = item['examples'][0]['succeed']

            items.append([text, label, pred, succeed, template_name])

    df = pd.DataFrame(items, columns=['text', 'label', 'pred', 'succeed', 'template'])
    
    return df, templates
    
    
def sample_df(df_template, total_samples, df_total_size):
    total = ceil(total_samples * (len(df_template) / df_total_size))

    if len(df_template) > 0:
        total = max(1, total)

    return df_template.sample(total)    

In [5]:
from math import ceil

!rmdir /s /q test_cases
!mkdir test_cases

approachs = ['approach1','approach2','approach3','approach4','approach5','random']

# Number of samples to generate for each approach (FAILING, PASSING)
all_total_samples = [
    (220, 232), # Approach1
    (224, 231), # Approach2
    (96, 191),  # Approach3
    (91, 186),  # Approach4
    (155, 217), # Approach5
    (100, 144)] # Random

for appr, total_samples in zip(approachs, all_total_samples):
    suite, tb = load_suite(f'./suites/posneg-{appr}.suite')
    df, templates = proccess_summary_do_dataframe(suite, tb)
    print(f'Proccessing {appr}...')

    filename = f'test_cases/{appr}.xlsx'
    
    df0 = df[df['succeed'] == 0]
    df1 = df[df['succeed'] == 1]
    
    s0 = 0
    s1 = 0
    with pd.ExcelWriter(filename) as writer:
        for template in templates:
            df_template0 = df0[df0['template'] == template]
            df_template1 = df1[df1['template'] == template]
    
            sampled0 = sample_df(df_template0, total_samples[0], len(df0))
            sampled1 = sample_df(df_template1, total_samples[1], len(df1))
            
            df_filtered = pd.concat([sampled0, sampled1], axis=0)
            columns = ['text', 'label', 'pred', 'succeed']

            df_filtered.to_excel(writer, sheet_name=template, index=False, columns=columns)
            s0 += len(sampled0)
            s1 += len(sampled1)

    print(appr, f'samples: (class 0: {s0}, class 1: {s1})')

Please wait as we prepare the table data...
Proccessing approach1...
approach1 samples: (class 0: 232, class 1: 247)
Please wait as we prepare the table data...
Proccessing approach2...
approach2 samples: (class 0: 241, class 1: 247)
Please wait as we prepare the table data...
Proccessing approach3...
approach3 samples: (class 0: 100, class 1: 199)
Please wait as we prepare the table data...
Proccessing approach4...
approach4 samples: (class 0: 94, class 1: 194)
Please wait as we prepare the table data...
Proccessing approach5...
approach5 samples: (class 0: 163, class 1: 232)
Please wait as we prepare the table data...
Proccessing random...
random samples: (class 0: 105, class 1: 154)
