In [2]:
%config Completer.use_jedi = False
import sys
sys.path.append('../')

In [3]:
from checklist.test_suite import TestSuite
import pandas as pd
from math import ceil

pd.set_option('display.max_colwidth', None)

def load_suite(path):
    suite = TestSuite.from_file(path)
    summary_table = suite.visual_summary_table()
    
    return suite, summary_table


def proccess_summary_do_dataframe(suite, summary_table):
    test_templates = summary_table.get_state()['test_infos']

    items = []
    templates = []
    
    for test_template in test_templates:
        test_template_name = test_template['name']
        template_name = test_template_name.split(' - ')[1]
        test_cases = suite.visual_summary_by_test(test_template_name).filtered_testcases

        templates.append(template_name)

        for item in test_cases:
            text = item['examples'][0]['new']['text']
            pred = item['examples'][0]['new']['pred']
            label = item['examples'][0]['label']
            succeed = item['examples'][0]['succeed']

            items.append([text, label, pred, succeed, template_name])

    df = pd.DataFrame(items, columns=['text', 'label', 'pred', 'succeed', 'template'])
    
    return df, templates
    


def sample_size(n, dp=0.8, e=0.05, z=1.96):
    p0 = dp*(1-dp)
    numerador = ((z**2) * p0)/(e**2)
    denominador = 1 + (((z**2) * p0)/((e**2)*n))
    return ceil(numerador/denominador)

def sample_df(df_template, df_total_size):
    total_samples = sample_size(df_total_size)
    total = ceil(total_samples * (len(df_template) / df_total_size))

    if len(df_template) > 0:
        total = max(1, total)

    return df_template.sample(total, random_state=42)    

In [10]:
from math import ceil

# !rmdir /s /q test_cases
# !mkdir test_cases

approachs = ['approach1','approach2','approach3','approach4','approach5','random']


for appr in ["random"]:
    suite, summary_table = load_suite(f'./suites/posneg-{appr}.suite')
    df, template_names = proccess_summary_do_dataframe(suite, summary_table)
    print(f'Proccessing {appr}...')

    filename = f'test_cases_amazon/{appr}.xlsx'
    
    df0 = df[df['succeed'] == 0]
    df1 = df[df['succeed'] == 1]
    
    sample0_count = 0
    sample1_count = 0
    with pd.ExcelWriter(filename) as writer:
        templates_df = pd.read_csv(f"generated_templates/generated_templates_{appr}.csv")
        templates_df.to_excel(writer, sheet_name="templates", index=False)
        for template_name in template_names:
            df_template0 = df0[df0['template'] == template_name]
            df_template1 = df1[df1['template'] == template_name]

            df_sampled0 = sample_df(df_template0, len(df0))
            df_sampled1 = sample_df(df_template1, len(df1))
            
            df_filtered = pd.concat([df_sampled0, df_sampled1], axis=0)
            columns = ['text', 'label', 'pred', 'succeed']

            df_filtered.to_excel(writer, sheet_name=template_name, index=False, columns=columns)
            sample0_count += len(df_sampled0)
            sample1_count += len(df_sampled1)

    print(appr, f'samples: (class 0: {sample0_count}, class 1: {sample1_count})')

Please wait as we prepare the table data...
Proccessing random...
random samples: (class 0: 120, class 1: 202)


In [9]:
template_names

['template1',
 'template2',
 'template3',
 'template4',
 'template5',
 'template6',
 'template7',
 'template8',
 'template9',
 'template10',
 'template11',
 'template12',
 'template13',
 'template14',
 'template15',
 'template16',
 'template17',
 'template18']

In [8]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      45
Fails (rate):    7 (15.6%)

Example fails:
1.0 I am running windows 7 different and a microsoft product is birthday with it .
----
0.6 I am running windows 7 good and a microsoft product is disapointed with it .
----
1.0 I am running windows 7 good and a microsoft product is birthday with it .
----


Test: MFT with vocabullary - template2
Test cases:      90
Fails (rate):    15 (16.7%)

Example fails:
0.0 If you think the image is too worth , you can always turn down the brightness in the game .
----
0.0 If you think the image is too educational , you can always turn down the brightness in the game .
----
0.5 If you think the image is too solid , you can always would down the brightness in the game .
----


Test: MFT with vocabullary - template3
Test cases:      30
Fails (rate):    19 (63.3%)

Example fails:
0.0 I have a book on March 14 for a useless present .
----
0.0 I deserves a book on March 14 for a disapoint

In [6]:
dir(suite.summary)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_on_select_test',
 'add',
 'create_raw_example_list',
 'format_example_fn',
 'from_file',
 'get_raw_example_list',
 'get_raw_examples',
 'info',
 'print_fn',
 'remove',
 'run',
 'run_from_file',
 'run_from_preds_confs',
 'save',
 'summary',
 'test_ranges',
 'tests',
 'to_dict',
 'to_raw_file',
 'visual_summary_by_test',
 'visual_summary_table']