### Missing test step analysis through association rules

In this notebook, we perform experiments with frequent itemset and association rule mining to discover patterns of occurrence of test steps and recommend test steps that are potentially missing from new test case descriptions

In [27]:
# Import necessary libraries
import random
import os
import pandas as pd
import re
import numpy as np
import json
import statistics as st
import nltk
from nltk.tokenize import TweetTokenizer, RegexpTokenizer, sent_tokenize, word_tokenize
from nltk.corpus import stopwords 
from nltk.stem import WordNetLemmatizer 
from nltk.util import ngrams
import math
from statistics import mean
import string
from collections import defaultdict

import collections
from pprint import pprint
from pathlib import Path
from typing import Iterator
import itertools
from tqdm import tqdm
import copy

from expects import (contain_exactly, equal, expect, have_keys)
import attr
from functools import partial
from tabulate import tabulate

import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import plotly.offline as offline
import plotly.io as pio
import plotly.graph_objects as go

from spacy.util import minibatch, compounding
import random
import spacy

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
import time

from efficient_apriori import apriori
import json

import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import Markdown as md

#### Load pre-processed data
For the missing test step module, with association rules, we apply all default pre-processing steps

In [None]:
# Load all the existing test cases and further select only the test steps to build the association rules
existing_test_cases_path = 'existing_test_cases.pkl'
existing_test_cases_df = pd.read_pickle(existing_test_cases_path)

In [None]:
# Quick visualize df
existing_test_cases_df

In [None]:
# Load the trained Named Entity Recognition (NER) model
nlp = spacy.load('custom_ner/model')

In [None]:
# Replace names of game entities by their entity type (e.g., replace the name of a game zone by 'game_zone')
for index, row in existing_test_cases_df.iterrows():
    current_step = ' '.join(row['Step'])
    step_text = nlp(current_step)
    
    if step_text.ents:
        for ent in step_text.ents:
            current_step = current_step[:ent.start_char] + ent.label_ + current_step[ent.end_char:]
    else:
        # No named entities found
        pass
    
    # Update test step
    existing_test_cases_df.iloc[index,6] = current_step.split()

#### Build and evaluate association rules

In [None]:
all_test_case_keys = list(set(existing_test_cases_df['Key']))

In [None]:
# Iterate through test cases to build and evaluate association rules
# Start with the first 2,500 test cases to build rules and evaluate on the 2501th, add the 2501th to the set of 2,500 and repeat steps
# Minimum confidence = 1 and lift above 1

test_case_accuracy_dict = {}
te = TransactionEncoder()

for test_key_index in tqdm(range(2500,3322)):
    rule_list = []
    correct_case = 0
    wrong_case = 0
    accuracy_step_list = []
    
    training_set_keys = all_test_case_keys[:test_key_index]
    testing_set_key = all_test_case_keys[test_key_index+1]
    
    existing_test_cases_training_df = existing_test_cases_df[existing_test_cases_df['Key'].isin(training_set_keys)]
    
    # Build associaiton rules with training set
    test_step_identifier_dict = {}
    identifier = 0
    for index,row in existing_test_cases_training_df.iterrows():
        step = tuple(row['Step'])
        if step not in test_step_identifier_dict:
            test_step_identifier_dict[step] = identifier
            identifier += 1

    test_case_transactions_dict = {}
    for index,row in existing_test_cases_training_df.iterrows():
        step = tuple(row['Step'])
        step_identifier = test_step_identifier_dict[step]

        test_case_key = row['Key']
        if test_case_key not in test_case_transactions_dict:
            test_case_transactions_dict[test_case_key] = [step_identifier]
        else:
            test_case_transactions_dict[test_case_key] += [step_identifier]

    transactions_list = []
    for key in test_case_transactions_dict:
        transactions_list.append(test_case_transactions_dict[key]) 

    te_ary = te.fit(transactions_list).transform(transactions_list)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    
    # Get frequent sets of test steps (min support = 0.005)
    frequent_itemsets = fpgrowth(df, min_support=0.005, use_colnames=True)
    step_list_complete = existing_test_cases_df[existing_test_cases_df['Key']==testing_set_key]['Step'].tolist()
    
    for i in range(len(step_list_complete)):
        correct_step = 0
        wrong_step = 0
        removed_step = step_list_complete[i]
        try:
            removed_step_id = [test_step_identifier_dict[tuple(removed_step)]]
        except:
            removed_step_id = -1
        
        step_list = [step_list_complete[j] for j in range(len(step_list_complete)) if j!=i]
        identifier_list = []
        for step in step_list:
            try:
                identifier = test_step_identifier_dict[tuple(step)]
                identifier_list.append(identifier)
            except:
                continue

        identifier_look_for = tuple(set(identifier_list))
        # If set of remaining test steps in the new test case appeared in existing test cases
        if len(identifier_look_for) > 0:
            a = frequent_itemsets[frequent_itemsets['itemsets'].map(len) == len(identifier_look_for)+1 ]
            for index,row in a.iterrows():
                if all(elem in list(row['itemsets'])  for elem in identifier_look_for):
                    X = set(identifier_look_for)
                    Y = set(list(row['itemsets'])) - set(identifier_look_for)
                    rule = str(X) + '->' + str(Y)
                    
                    # Compute confidence of the rule
                    support_X = float(frequent_itemsets[frequent_itemsets['itemsets'] == frozenset(list(X))]['support'])
                    suport_XY = float(a[a['itemsets'] == frozenset(list(X) + list(Y))]['support'])
                    confidence_XY = suport_XY/support_X
                    
                    # Compute lift of the rule
                    support_Y = float(frequent_itemsets[frequent_itemsets['itemsets'] == frozenset(list(Y))]['support'])
                    lift_XY = confidence_XY/support_Y
                    
                    # Select rules with min confidence of 1 and lift above 1
                    if (confidence_XY == 1) and (lift_XY > 1):
                        # Rule is valid, i.e., meet criteria
                        rule_list.append(rule)
                        if list(Y) == removed_step_id:
                            correct_step += 1
                            correct_case += 1
                        else:
                            wrong_step += 1
                            wrong_case += 1
        
        try:
            accuracy_step = correct_step / (correct_step + wrong_step)
            accuracy_step_list.append(accuracy_step)
        except:
            pass
        
    try:
        # Compute accuracy and save metrics
        accuracy_case = correct_case / (correct_case + wrong_case)
        test_case_accuracy_dict[testing_set_key] = [len(rule_list), rule_list, accuracy_case, accuracy_step_list]  
        with open('missing_step_evaluation.txt', 'w') as file:
            file.write(json.dumps(test_case_accuracy_dict)) # use `json.loads` to do the reverse  
    except:
        pass

In [None]:
# Iterate through test cases to build and evaluate association rules
# Start with the first 2,500 test cases to build rules and evaluate on the 2501th, add the 2501th to the set of 2,500 and repeat steps
# Minimum confidence = 0.5 and lift above 1

test_case_accuracy_dict = {}
te = TransactionEncoder()

for test_key_index in tqdm(range(2500,3322)):
    rule_list = []
    correct_case = 0
    wrong_case = 0
    accuracy_step_list = []
    
    training_set_keys = all_test_case_keys[:test_key_index]
    testing_set_key = all_test_case_keys[test_key_index+1]
    
    existing_test_cases_training_df = existing_test_cases_df[existing_test_cases_df['Key'].isin(training_set_keys)]
    
    # Build associaiton rules with training set
    test_step_identifier_dict = {}
    identifier = 0
    for index,row in existing_test_cases_training_df.iterrows():
        step = tuple(row['Step'])
        if step not in test_step_identifier_dict:
            test_step_identifier_dict[step] = identifier
            identifier += 1

    test_case_transactions_dict = {}
    for index,row in existing_test_cases_training_df.iterrows():
        step = tuple(row['Step'])
        step_identifier = test_step_identifier_dict[step]

        test_case_key = row['Key']
        if test_case_key not in test_case_transactions_dict:
            test_case_transactions_dict[test_case_key] = [step_identifier]
        else:
            test_case_transactions_dict[test_case_key] += [step_identifier]

    transactions_list = []
    for key in test_case_transactions_dict:
        transactions_list.append(test_case_transactions_dict[key]) 

    te_ary = te.fit(transactions_list).transform(transactions_list)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    
    # Get frequent sets of test steps (min support = 0.005)
    frequent_itemsets = fpgrowth(df, min_support=0.005, use_colnames=True)
    step_list_complete = existing_test_cases_df[existing_test_cases_df['Key']==testing_set_key]['Step'].tolist()
    
    for i in range(len(step_list_complete)):
        correct_step = 0
        wrong_step = 0
        removed_step = step_list_complete[i]
        try:
            removed_step_id = [test_step_identifier_dict[tuple(removed_step)]]
        except:
            removed_step_id = -1
        
        step_list = [step_list_complete[j] for j in range(len(step_list_complete)) if j!=i]
        identifier_list = []
        for step in step_list:
            try:
                identifier = test_step_identifier_dict[tuple(step)]
                identifier_list.append(identifier)
            except:
                continue

        identifier_look_for = tuple(set(identifier_list))
        # If set of remaining test steps in the new test case appeared in existing test cases
        if len(identifier_look_for) > 0:
            a = frequent_itemsets[frequent_itemsets['itemsets'].map(len) == len(identifier_look_for)+1 ]
            for index,row in a.iterrows():
                if all(elem in list(row['itemsets'])  for elem in identifier_look_for):
                    X = set(identifier_look_for)
                    Y = set(list(row['itemsets'])) - set(identifier_look_for)
                    rule = str(X) + '->' + str(Y)
                    
                    # Compute confidence of the rule
                    support_X = float(frequent_itemsets[frequent_itemsets['itemsets'] == frozenset(list(X))]['support'])
                    suport_XY = float(a[a['itemsets'] == frozenset(list(X) + list(Y))]['support'])
                    confidence_XY = suport_XY/support_X
                    
                    # Compute lift of the rule
                    support_Y = float(frequent_itemsets[frequent_itemsets['itemsets'] == frozenset(list(Y))]['support'])
                    lift_XY = confidence_XY/support_Y
                    
                    # Select rules with min confidence of 1 and lift above 1
                    if (confidence_XY >= 0.5) and (lift_XY > 1):
                        # Rule is valid, i.e., meet criteria
                        rule_list.append(rule)
                        if list(Y) == removed_step_id:
                            correct_step += 1
                            correct_case += 1
                        else:
                            wrong_step += 1
                            wrong_case += 1
        
        try:
            accuracy_step = correct_step / (correct_step + wrong_step)
            accuracy_step_list.append(accuracy_step)
        except:
            pass
        
    try:
        # Compute accuracy and save metrics
        accuracy_case = correct_case / (correct_case + wrong_case)
        test_case_accuracy_dict[testing_set_key] = [len(rule_list), rule_list, accuracy_case, accuracy_step_list]  
        with open('missing_step_evaluation_confidence_05.txt', 'w') as file:
            file.write(json.dumps(test_case_accuracy_dict)) # use `json.loads` to do the reverse  
    except:
        pass

In [115]:
# Check results
with open('missing_step_evaluation.txt') as f:
    min_confidence_1 = json.load(f)

number_rules_list = []
accuracy_list = []
for test_case_key, value in min_confidence_1.items():
    number_rules_list.append(value[0])
    accuracy_list.append(value[2])
print("Total rules: ")
print(sum(number_rules_list))
print("Median rules per test case: ")
print(st.median(number_rules_list))
print("Average rules per test case: ")
print(st.mean(number_rules_list))
print("Median accuracy per test case: ")
print(st.median(accuracy_list))
print("Average accuracy per test case: ")
print(st.mean(accuracy_list))

In [117]:
# Check results
with open('missing_step_evaluation_confidence_05.txt') as f:
    min_confidence_05 = json.load(f)

number_rules_list = []
accuracy_list = []
for test_case_key, value in min_confidence_05.items():
    number_rules_list.append(value[0])
    accuracy_list.append(value[2])

print("Total rules: ")
print(sum(number_rules_list))
print("Median rules per test case: ")
print(st.median(number_rules_list))
print("Average rules per test case: ")
print(st.mean(number_rules_list))
print("Median accuracy per test case: ")
print(st.median(accuracy_list))
print("Average accuracy per test case: ")
print(st.mean(accuracy_list))