# Periodic frequent-pattern discovery (with time measurement)
Using algorithm from this paper: https://www.researchgate.net/publication/356825094_Discovering_Periodic-Frequent_Patterns_in_Uncertain_Temporal_Databases?enrichId=rgreq-1de5ac4c085dd4a641f85dda9c527a37-XXX&enrichSource=Y292ZXJQYWdlOzM1NjgyNTA5NDtBUzoxMTQzMTI4MTA4MzQ2MzY1M0AxNjYyNTk3MzM3NTUz&el=1_x_3&_esc=publicationCoverPdf

The algorithm is the same as the original pfpattern.ipynb, but seperated for custom time measurements (and protect ourselves from screwing the original file).

This script assumes there're preprocessed datasets such as T40I10D100K.

In [None]:
import numpy as np
from tqdm import tqdm
import pandas as pd
import glob
import os
import matplotlib
import sortednp
import datetime
import json
from operator import itemgetter
import timeit
import time
import numba

# Algorithm

## Find frequent items (1-pattern)

Both sum of prob and max of time difference (period)

In [None]:
exclude_columns = ['Time']

In [None]:
def prepare_database(database_df,
                    min_support, max_period):
    expsup_one_df = database_df.sum(axis=0)

    period_one_df = expsup_one_df.copy(deep=True)
    min_time, max_time = database_df['Time'].min(), database_df['Time'].max()
    for col in database_df.columns:
        if (col == 'Time'): continue
        ser = database_df['Time'][ database_df[col] > 0 ]
        #print(ser.min(), ser.diff().max(), ser.max())
        period_one_df[col] = max( ser.min() - min_time, ser.diff().max(), max_time - ser.max() )

    pf_one_items = [ [col, expsup_one_df[col], period_one_df[col]] 
                        for col in database_df.columns 
                        if (col != 'Time') and (expsup_one_df[col] >= min_support) and (max_period >= period_one_df[col])  ]
    pf_one_df = pd.DataFrame(pf_one_items, columns = ['Item', 'ExpSup', 'MaxPeriod'])
    pf_one_df.sort_values(by=['ExpSup', 'MaxPeriod'], ignore_index=True, ascending=False, inplace=True)
    pf_one_df

    items = pf_one_df['Item'].unique().tolist()
    mining_df = database_df[ ['Time'] + items ]

    #print('Items = ', items)
    return mining_df, items, pf_one_df, min_time, max_time

## Define tree

Some of the code are taken from: https://github.com/Likhitha-palla/UPFP

In [None]:
class Node(object):
    def __init__(self, item, children, parent, probability=0):
        self.item = item
        self.probability = probability              # expSupCap of the path/pattern
        self.children = children
        self.parent = parent
        #self.times = []
        self.times = np.ndarray(shape=(0))

    def addChild(self, node):
        if (node.item not in self.children):
            self.children[node.item] = []
        self.children[node.item] = node
        node.parent = self

In [None]:
class Tree(object):
    def __init__(self, items):
        self.nodecounts = 0
        self.root = Node(-1, {}, None)
        self.items = items
        self.nodelists = {}     # item  : [ nodes of this item ]
        for i in items:
            self.nodelists[i] = []
        # self.nodelists = [ [] for i in range(len(items)) ]
        

## Build tree

In [None]:
def build_tree(matrix, time_col, items, algo_mode: int = 1):
    tree = Tree(items)    
    print("Algo mode = ", algo_mode, " (1 = min, 2 = max)")
    
    x_indexes, y_indexes = np.where( matrix > 0 )
    x_idx = 0
    y_idx = 0
    i = 0
    node = tree.root
    item = items[y_indexes[0]]
    expCap = 1
        
        
    while (i < x_indexes.shape[0]):
        x_idx = x_indexes[i]        # row
        y_idx = y_indexes[i]        # col

        item = items[y_idx]
        #item = y_idx                # now use the index as the item itself (the list of item names are present in the tree already)
        prob = matrix[x_idx][y_idx]
        
        if (item in node.children):
            node = node.children[item]
        else:
            new_node = Node(item, {}, node)
            tree.nodecounts += 1
            node.addChild(new_node)
            node = new_node
            tree.nodelists[item].append(node)
        node.probability += prob*expCap                # multiply by maximum prob of previous items in this transaction.
        
        if (algo_mode == 1):
            expCap = min(expCap, prob)
        elif (algo_mode == 2):
            expCap = max(expCap, prob)                    # changing to max, see if it's better
    
        i += 1
        if (i >= x_indexes.shape[0]) or (x_idx != x_indexes[i]):               # the current row is done, the node is the leaf
            node.times = np.append( node.times, time_col[x_idx] )      
            node = tree.root            # reset back to root
            expCap = 1
        #progress_bar.update(1)
    
    #progress_bar.close()
    return tree

### Remove item from tree entirely

In [None]:
def remove_item(tree: Tree, item):
    for node in tree.nodelists[item]:
        node.parent.times = np.concatenate([node.parent.times, node.times])
        
        #sortednp.merge( node.parent.times, node.times ) 
        #node.parent.times += node.times
        node.parent.children[item] = None
        del node

## UPFP-growth

The list of patterns shall look like this: [ [ items ], support, period ]

The string produced for report for each pattern: {items} [support, period]

For example: "AQI_O3_MED, Motorbike_MED" [245, 154]

In [None]:
def traverse_path(node: Node):
    path = []
    p = node
    while (p.parent.item != -1):
        path.append(p.parent.item)
        p = p.parent
    path.reverse()
    return path

In [None]:
@numba.jit(nopython=True)
def find_period(times: np.ndarray, min_time: float, max_time: float):
    if (times is None) or (len(times) == 0): return 9999999
    ts = np.sort(times)
    period = max( min(ts) - min_time, max_time - max(ts) )
    if (len(ts) > 1):
        period = max(period, np.max(np.diff(ts)))
    # for i in range(len(ts)-1):
    #     difference = ts[i+1] - ts[i]
    #     period = max(period, difference)
    return period

### Helper funcs

In [None]:
def find_frequent_items(cond_patterns, cond_ts, cond_sups, min_support, max_period, min_time, max_time):
    item_sup_per = { }
    for i in range(len(cond_patterns)):
        for item in cond_patterns[i]:
            if (item not in item_sup_per):  
                item_sup_per[item] = [ 0 , np.ndarray(shape=(0)) ]          # support/prob, period
            item_sup_per[item][0] += cond_sups[i]
            item_sup_per[item][1] = np.concatenate( [ item_sup_per[item][1], cond_ts[i] ] )  # appending lists
    for item in item_sup_per:
        item_sup_per[item][1] = find_period(item_sup_per[item][1], min_time, max_time)
    freq_item_dict = { key: value for key, value in item_sup_per.items() if (value[0] >= min_support) and (value[1] <= max_period) }
    freq_item_dict = dict(sorted(freq_item_dict.items(), key=lambda item: (item[1][0], item[1][1])))

    return freq_item_dict

In [None]:
def make_condition_pattern_base(tree: Tree, item, min_support, max_period, min_time, max_time):
    cond_patterns = [ None for i in range(len(tree.nodelists[item])) ]
    cond_times = [ None for i in range(len(tree.nodelists[item])) ]
    cond_sups = np.ndarray(shape=(len(tree.nodelists[item])))
    #[ None for i in range(len(tree.nodelists[item])) ]
    
    i = 0
    for node in tree.nodelists[item]:
        cond_patterns[i] = traverse_path(node)
        cond_times[i] = node.times 
        cond_sups[i] = node.probability
        i += 1
    freq_item_sup_per = find_frequent_items(cond_patterns, cond_times, cond_sups, min_support, max_period, min_time, max_time)
    
    new_patterns, new_times, new_sups = [], [], np.ndarray(shape=(len(tree.nodelists[item])))
    count = 0
    for p in cond_patterns:
        p1 = [ item for item in p if item in freq_item_sup_per ]
        if (len(p1) > 0):
            p1 = sorted(p1, key=lambda item: (freq_item_sup_per[item][0], freq_item_sup_per[item][1]), reverse=True )
            new_patterns.append(p1)
            new_times.append(cond_times[count])
            #new_sups.append(cond_sups[count])
            new_sups[count] = cond_sups[count]
        count += 1
    new_sups = new_sups[0 : count]
    return freq_item_sup_per, new_patterns, new_times, new_sups

In [None]:
def add_transaction_condition(tree: Tree,transaction,times,sup):
    node=tree.root
    for item in transaction:
        if item not in node.children:
            new_node=Node(item,{}, node)
            node.addChild(new_node)
            # if item not in tree.nodelists:
            #     tree.nodelists[ item ] = []
            tree.nodelists[item].append(new_node)            
        node = node.children[item] 
        node.probability += sup           
    node.times = np.concatenate( [node.times, times] )

### Main algorithm

In [None]:
# return the list of all pattern satisfying the constrains. 
def upfp_growth(tree: Tree, prefix, 
                min_support, max_period, 
                min_time, max_time):
    mined_patterns = []

    for i in range(len(tree.items)-1, -1, -1):
        item = tree.items[i]
        newprefix = prefix + [item]
        expsup = 0
        for node in tree.nodelists[item]:
            expsup += node.probability
        if (expsup >= min_support):
            freq_item_sup_per, cond_patterns, cond_times, cond_sups = make_condition_pattern_base(tree, item, 
                                                                                min_support, max_period, 
                                                                                min_time, max_time)
            cond_tree = Tree(list(freq_item_sup_per.keys()))
            for p in range(len(cond_patterns)):
                add_transaction_condition(cond_tree, cond_patterns[p], cond_times[p], cond_sups[p])
            if (len(cond_patterns) > 0):
                mined_patterns += upfp_growth(cond_tree, newprefix, min_support, max_period, min_time, max_time ) 
            else:
                # if no more items to search: stop and return.
                mined_patterns.append(np.array(newprefix, dtype=np.int16))
        remove_item(tree, item)
    return mined_patterns

## Recalculate actual Expected support for one more time

In [None]:
def counting_sort(array, count_func):
    max_count = 0
    for i in range(len(array)):
        max_count = max(max_count, count_func(array[i]))

    # the usable count is [0, max_count], inclusive, hence (max_count + 1) different counts
    # we need h[max_count+1] be the end index of sorted array, for iterating the last count's indexes
    count_list_heads = [ 0 for i in range(max_count + 2)]          
    for i in range(len(array)):
        count_list_heads[ count_func(array[i]) ] += 1
    
    # cumulative sum
    for i in range(max_count + 1):
        count_list_heads[i + 1] += count_list_heads[i]
    
    sorted_array = [ None for i in range(len(array)) ]
    for i in range(len(array)):
        count = count_func(array[i])
        count_list_heads[ count ] -= 1
        sorted_array[ count_list_heads[ count ] ] = array[i] 

    return sorted_array, count_list_heads


In [None]:
@numba.jit(nopython=True)
def pattern_priority(ptn):
    return np.min(ptn)

In [None]:
@numba.jit(nopython=True)
def find_expSup(pattern_matrix: np.ndarray):
    probabilities = np.zeros(pattern_matrix.shape[0])
    for i in range(pattern_matrix.shape[0]):
        probabilities[i] = np.prod(pattern_matrix[i, :])
    # probabilities = np.prod(pattern_matrix, axis=1, keepdim=False)
    return probabilities, np.sum(probabilities)

In [None]:
@numba.jit(nopython=True)
def find_final_period(times: np.ndarray, 
                    probabilities: np.ndarray, 
                    min_time: float, max_time: float):
    period = 0
    times = times.copy()
    times = times[ probabilities > 0 ]
    # times = times.to('cuda:0')
    if (times is None) or (times.shape[0] == 0): 
        period = 999999
    else:
        period = max( np.min(times) - min_time, max_time - np.max(times) )
    if (len(times) > 1):
        period = max( period, np.max(np.diff(times)) )
    return period

In [None]:
def filter_patterns(mined_patterns, mining_df, min_time, max_time, min_support, max_period):
    
    sorted_patterns, length_heads = counting_sort(mined_patterns, lambda ptn: len(ptn))
    for length in range(1, len(length_heads) - 2):
        sorted_patterns[ length_heads[length] : length_heads[length+1] ] = sorted(
                                    sorted_patterns[length_heads[length] : length_heads[length+1]],
                                    key = lambda p: pattern_priority(p) )

    filtered_patterns = []
    print('Potential max pattern length = ', len(length_heads) - 2)
    
    tsr = mining_df.drop(columns=['Time'], axis=1).to_numpy()
    time_tsr = mining_df['Time'].to_numpy()
    #tsr = torch.FloatTensor( mining_df.drop(columns=['Time'], axis=1).to_numpy() ).detach().to(device)      # for faster calculation for big dataset
    #time_tsr = torch.LongTensor( mining_df['Time'].to_numpy()).detach().to(device)
    

    # binary search
    min_length = 1
    max_length = len(length_heads) - 2
    length = 0
    start_pattern_idx = -1
    max_pattern_start_idx = -1


    # quick search to reduce the space needed to scan.
    length = 1
    while (length <= max_length):
        pattern_idx = length_heads[length]
        start_pattern_idx = -1
        while (pattern_idx < length_heads[length + 1]):
            ptn = sorted_patterns[pattern_idx]  
            #sub_tsr = torch.index_select(input=tsr, dim=1, index=torch.LongTensor(ptn).to(device))
            sub_tsr = tsr[ :, ptn ]
            probabilities, expSup = find_expSup(sub_tsr)
            period = find_final_period(time_tsr, probabilities,
                                min_time, max_time)
            if (expSup >= min_support) and (period <= max_period):       
                start_pattern_idx = pattern_idx
                break
            #torch.cuda.empty_cache() 
            if (length_heads[length + 1] - length_heads[length] == 1):
                break
            pattern_idx += int((np.log2(length_heads[length + 1] - length_heads[length])))
            
        if (start_pattern_idx > -1):
            print("Found the pattern with length = ", length)
            max_pattern_length = length
            max_pattern_start_idx = length_heads[length]
            min_length = length + 1
        length *= 2


    # this search find the most possible length, but you must pass it another time to have all patterns with such length.
    while (min_length <= max_length):
        start_pattern_idx = -1
        length = (min_length + max_length) // 2
        print("Checking potential patterns with length = ", length)

        for pattern_idx in tqdm(range( length_heads[length], length_heads[ length + 1 ]), desc="pattern" ):
            ptn = sorted_patterns[pattern_idx]
            sub_tsr = tsr[ :, ptn ]
            probabilities, expSup = find_expSup(sub_tsr)
            period = find_final_period(time_tsr, probabilities,
                                min_time, max_time)

            if (expSup >= min_support) and (period <= max_period):       
                start_pattern_idx = pattern_idx
                break

        if (start_pattern_idx > -1):
            print("Found the pattern with length = ", length)
            max_pattern_length = length
            max_pattern_start_idx = start_pattern_idx
            min_length = length + 1
        else:
            max_length = length - 1
        

    print("Now calculating all patterns with max length...")
    for pattern_idx in tqdm(range( max_pattern_start_idx, length_heads[ max_pattern_length + 1 ]), desc="pattern" ):
        ptn = sorted_patterns[pattern_idx]
        sub_tsr = tsr[ :, ptn ]
        probabilities, expSup = find_expSup(sub_tsr)
        period = find_final_period(time_tsr, probabilities,
                            min_time, max_time)
        if (expSup >= min_support) and (period <= max_period):          
            filtered_patterns.append( [ ptn, expSup, period ] ) 
        

    filtered_patterns.sort( key = lambda p: (len(p[0]), -p[2], p[1]), reverse=True )
    return filtered_patterns, max_pattern_length, len(length_heads) - 2

In [None]:
def make_strings_from_patterns(patterns, item_list):
    pattern_strings = []
    for ptn in patterns:
        item_str = ""
        for i in range(len(ptn[0]) - 1):
            item_str += item_list[ptn[0][i]] + ', '
        item_str += item_list[ptn[0][ len(ptn[0]) - 1 ]]
        item_str += ':[' + str(ptn[1]) + ', ' + str(ptn[2]) + ']\n' 
        pattern_strings.append( item_str )
    return pattern_strings

In [None]:
def produce_patterns(mined_patterns, mining_df, item_list, min_time, max_time, min_support, max_period):

    final_patterns, max_pattern_length, potential_max_length = filter_patterns(mined_patterns, mining_df, 
                                min_time, max_time, min_support, max_period)
    #print('Max pattern length: ', max_pattern_length)
    pattern_strings = make_strings_from_patterns(final_patterns, item_list)
    if (len(pattern_strings) > 0): print('Most periodic pattern = ', pattern_strings[0])
    return pattern_strings, final_patterns, max_pattern_length, potential_max_length

## Export results

In [None]:
def export_results(output_folder_path: str, 
                pattern_strings, pf_one_df, database_path, min_support, max_period):
    os.makedirs(output_folder_path, exist_ok=True)
    pf_one_df.to_csv(os.path.join(output_folder_path,'fp_pattern_one.csv'))
    with open(os.path.join(output_folder_path, 'patterns.txt'), 'w') as f:
        f.writelines(pattern_strings)
        f.close()
    with open(os.path.join(output_folder_path, 'setting.json'), 'w') as f:
        f.write( json.dumps({ 'data_path': database_path,  'min_support': min_support, 'max_period': max_period }, indent=4) )

# Actual run with time measurement

Since this algorithm is deterministic (no random operation), running them many times should yield the same result. Hence, we only care about exporting one output only, but run many times to have an average runtime.

# Define functions to measure time and save results

In [None]:
timetable_columns = [ 'Mode', 'MinSup', 'MaxPer', 
                'PFItemCount', 
                'prepare_database', 'make_transaction', 'build_tree', 'upfp_growth', 'produce_pattern', 
                'max_pattern_length', 'potential_max_length', 'potential_pattern_count',
                'size_tree' ]

In [None]:
def mine_patterns(database_df, output_folder_path, 
                min_support, max_period, algo_mode: int = 1, export: bool = True):
    
    times = [ 0 for i in range(5) ]
    
    times[0] = timeit.default_timer()
    mining_df, item_list, pf_one_df, min_time, max_time = prepare_database(database_df, min_support, max_period)
    times[0] = timeit.default_timer() - times[0]
    
    if (len(item_list) == 0): 
        return [], pf_one_df, None
        
    times[2] = timeit.default_timer()
    #tree = build_tree(transactions, item_list, algo_mode)
    time_col = mining_df['Time'].copy().to_numpy()         # to ensure no editing shall affect the main data
    npmatrix = mining_df.drop(columns=['Time'], axis=1).to_numpy()
    tree = build_tree(npmatrix, time_col, list(range(len(item_list))), algo_mode)
    times[2] = timeit.default_timer() - times[2]
    size_tree = tree.nodecounts
    print("Tree node count = ", size_tree)

    times[3] = timeit.default_timer()
    mined_patterns = upfp_growth(tree, [], min_support, max_period, min_time, max_time )
    times[3] = timeit.default_timer() - times[3]
    potential_pattern_counts = len(mined_patterns)
    print("Algo time = ", times[3])

    max_pattern_length, potential_max_length = -1, -1
    times[4] = timeit.default_timer()
    if (export):
        pattern_strings, _, max_pattern_length, potential_max_length = produce_patterns(mined_patterns, mining_df, item_list,
                                                            min_time, max_time, min_support, max_period)
    else: pattern_strings = ""
    times[4] = timeit.default_timer() - times[4]

    times = [algo_mode, min_support, max_period, len(item_list) ] + times + [max_pattern_length, potential_max_length, potential_pattern_counts, size_tree]
    
    return pattern_strings, pf_one_df, times

In [None]:
def measure_time(database_df:pd.DataFrame, database_path:str,
                output_path: str, fulltimetable_path:str,
                min_supports, max_periods,
                iteration_count: int = 5,
                algo_mode: int = 1,
                export: bool = True):
    time_folder_path = os.path.join(output_path, 'times/')
    os.makedirs(time_folder_path, exist_ok=True)
    timetable = pd.DataFrame( columns=timetable_columns )
    fulltimetable = pd.read_csv(fulltimetable_path)
    
    pattern_strings = []
    for min_sp in min_supports:
        for max_per in max_periods:
            print('\n=====\nMin support, max period = ', (min_sp, max_per))       
            outpath = os.path.join(output_path, 's' + str(min_sp) + '_p' + str(max_per))
            for i in range(iteration_count):
                pattern_strings, pf_one_df, times = mine_patterns(database_df, outpath, 
                                        min_sp, max_per, algo_mode, export=export)
                if (times is None): continue
                timetable.loc[ timetable.shape[0] ] = times
                timetable.to_csv(time_folder_path + 'times.csv', index=False)
                fulltimetable.loc[ fulltimetable.shape[0] ] = times
                fulltimetable.to_csv(fulltimetable_path, index=False)
                print("...Algo time of iteration " + str(i) + " = " + str(times[7]))
            
            export_results(outpath, pattern_strings, pf_one_df, database_path, min_sp, max_per)

    with open(os.path.join(time_folder_path, "setting.json"), 'w') as f:
        f.write( json.dumps( { 'input_path': database_path, 
                                'min_supports': min_supports, 
                                'max_periods': max_periods,
                                'algo_mode': algo_mode 
                            }, indent=4 ) )
    return timetable, fulltimetable


# Input

In [None]:
datasets = {
    "UrbanAir": r"../data/UTDATABASE/utd_20221222_0226/label_timeconverted.csv",
    "T10I4D200K": r"../data/others/T10I4D200K/T10I4D200K.csv",
}

In [None]:
database_name = "UrbanAir"
database_path = datasets[database_name]

In [None]:
database_df = pd.read_csv(database_path)
database_df.fillna(0, inplace=True)

# Test

We'll test with different support and period constraints.

In [None]:
# min_default_support = 500
# max_default_period = 1000
iteration_count = 1
min_supports = []
max_periods = []
algo_mode = 2       # 1 = min, 2 = max

base = 2
for i in range(10, 0, -1):    
    min_supports.append(pow(base, i))
for i in range(10, 12):
    max_periods.append(pow(base, i))

print(min_supports)
print(max_periods)
print(iteration_count)
print(algo_mode)

In [None]:
output_folder_path = f'../output_temp/{database_name}/TIME_V2/ufp_' + datetime.datetime.now().strftime(format="%m%d_%H%M")
os.makedirs(output_folder_path, exist_ok=True)

fulltimetable_path = f"../output_temp/{database_name}/TIME_V2/times.csv"
if not (os.path.exists(fulltimetable_path)):
    timetable = pd.DataFrame( columns= timetable_columns )
    timetable.to_csv(fulltimetable_path, index=False)

In [None]:
# for initializating numba
mine_patterns(database_df, "", 4096, 128, algo_mode, export=True)

In [None]:
timetable, fulltimetable = measure_time(database_df, database_path,
                            output_folder_path, fulltimetable_path,
                            min_supports, max_periods, iteration_count, algo_mode, False)

In [None]:
# timetable, fulltimetable = measure_time(database_df, database_path,
#                             output_folder_path, fulltimetable_path,
#                             min_supports, max_periods, 2, algo_mode, False)

In [None]:
# timetable, fulltimetable = measure_time(database_df, database_path,
#                             output_folder_path, fulltimetable_path,
#                             min_supports, max_periods, 1, 3 - algo_mode, False)

# Analyse time

In [None]:
fulltimefolder_path = f"../output_temp/{database_name}/TIME_V2/"
fulltimetable = pd.read_csv(fulltimefolder_path + 'times.csv')

In [None]:
settings = { 
            'min_supports': fulltimetable['MinSup'].unique().tolist(),
            'max_periods': fulltimetable['MaxPer'].unique().tolist() 
            }
print(settings)

In [None]:
grouped_time_groupby = fulltimetable.groupby(by=['Mode', 'MinSup', 'MaxPer'])
grouped_time_df = grouped_time_groupby.mean()

In [None]:
algo_runtime_df = grouped_time_df[ ['PFItemCount', 'upfp_growth'] ]
algo_runtime_df.reset_index(inplace=True)
algo_runtime_df

In [None]:
# sorted_by_pfitemcount_timedf = timetable.sort_values(by=['PFItemCount'],ignore_index=True)

In [None]:
import matplotlib.pyplot as plt

In [None]:
mode_colors = [' ', 'bo-', 'ro-']

In [None]:
minsup_output_path = os.path.join(fulltimefolder_path, 'minsups/')
os.makedirs(minsup_output_path, exist_ok=True)

for min_sup in settings['min_supports']:
    for algo_mode in range(1,3):
        sub_df = algo_runtime_df[ (algo_runtime_df['Mode'] == algo_mode) & (algo_runtime_df['MinSup'] == min_sup) ]
        max_pers = sub_df['MaxPer'].to_numpy()
        times = sub_df['upfp_growth'].to_numpy()
        plt.plot(max_pers, times, mode_colors[algo_mode], label='Mode'+str(algo_mode))

    plt.legend()
    plt.xlabel('Max periods')
    plt.ylabel('Time (sec)')
    plt.title('Time of algo by min support = ' + str(min_sup))
    
    plt.xscale('log', base=2)
    plt.yscale('log', base=2)
    min_log_times, max_log_times = int(np.floor(np.min(np.log2(times)))), int(np.ceil(np.max(np.log2(times))))
    min_log_per, max_log_per = int(np.floor(np.min(np.log2(max_pers)))), int(np.ceil(np.max(np.log2(max_pers))))
    plt.xticks([ pow(2, p) for p in range(min_log_per, max_log_per + 1) ])
    plt.yticks([ pow(2, p) for p in range(min_log_times, max_log_times + 1) ])
    #plt.show()

    plt.savefig(os.path.join(minsup_output_path, 'minSup' + str(min_sup) + '.png'))
    plt.clf()

In [None]:
maxper_output_path = os.path.join(fulltimefolder_path, 'maxpers/')
os.makedirs(maxper_output_path, exist_ok=True)

for max_per in settings['max_periods']:
    print(max_per)
    for algo_mode in range(1,3):
        sub_df = algo_runtime_df[ (algo_runtime_df['Mode'] == algo_mode) & (algo_runtime_df['MaxPer'] == max_per) ]
        min_sups = sub_df['MinSup'].to_numpy()
        times = sub_df['upfp_growth'].to_numpy()
        plt.plot(min_sups, times, mode_colors[algo_mode], label='Mode'+str(algo_mode))
        
    plt.legend()
    plt.xlabel('Min support')
    plt.ylabel('Time (sec)')

    plt.xscale('log', base=2)
    plt.yscale('log', base=2)

    min_log_times, max_log_times = int(np.floor(np.min(np.log2(times)))), int(np.ceil(np.max(np.log2(times))))
    min_log_x, max_log_x = int(np.floor(np.min(np.log2(min_sups)))), int(np.ceil(np.max(np.log2(min_sups))))
    plt.xticks([ pow(2, p) for p in range(min_log_x, max_log_x + 1) ])
    plt.yticks([ pow(2, p) for p in range(min_log_times, max_log_times + 1) ])

    plt.title('Time of algo by max period = ' + str(max_per))
    #plt.show()
    plt.savefig(os.path.join(maxper_output_path, 'maxPer' + str(max_per) + '.png'))
    plt.clf()

In [None]:
# # 3d plot
# x_values = algo_runtime_df['MinSup'].to_numpy()
# y_values = algo_runtime_df['MaxPer'].to_numpy()
# z_values = algo_runtime_df['upfp_growth'].to_numpy()  
# fig = plt.figure(10) 
# ax = plt.axes(projection='3d')
# ax.plot_surface(x_values, y_values, z_values)
# plt.show()