In [None]:
import pandas as pd
from mining_utils import mining_utils
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
from joblib import Parallel, delayed
import os
from pathlib import Path

## Load transactions (converted from [dataset](https://github.com/msr-fiddle/philly-traces))

In [None]:
df = pd.read_csv(f'transaction_philly.csv').drop('Unnamed: 0', axis=1)
Path("rules/philly").mkdir(parents=True, exist_ok=True)

## Generate frequent itemsets

In [None]:
itemsets = fpgrowth(df, min_support=0.05, use_colnames=True, max_len=4)
itemsets

## Generate association rules
We can see there are many generated rules, which is difficult to focus on

In [None]:
rules = association_rules(itemsets, metric='lift', min_threshold = 1.5)
rules

## Generate low utilization rules

In [None]:
keyword = "mean_util_low"
ant_rule, cons_rule = mining_utils.gen_rule(rules, keyword)

## Prune rules and save

In [None]:
cons_prune, ant_prune = mining_utils.prune_rule(ant_rule, cons_rule, keyword, 1.5, 1.5)
ant_prune.to_csv('rules/philly/antecedent_underutilize.csv')
cons_prune.to_csv('rules/philly/consequent_underutilize.csv')

## Generate job failure rules

In [None]:
keyword = 'Failed'
ant_rule, cons_rule = mining_utils.gen_rule(rules, keyword)

## Prune rules and save

In [None]:
cons_prune, ant_prune = mining_utils.prune_rule(ant_rule, cons_rule, keyword, 1.5, 1.5)
ant_prune.to_csv('rules/philly/antecedent_fail.csv')
cons_prune.to_csv('rules/philly/consequent_fail.csv')