In [1]:
import fim
import pandas as pd
import numpy as np

In [263]:
transactions = [ [1,2,3,100],
  [1,3],
  [1,2,100,23], 
  [1,2,100,24], 
  [1,2,100,24], 
  [1,2,100,23], 
  [5,7,8]
]

In [264]:
def parse_as_df_bhascl(rule_list):
    arules = [f"{set(r[1][0:])} => {r[0]}" for r in rules]
    df = pd.DataFrame(rules, columns=['head', 'body',*report_str])
    df['arules'] = arules
    df['s_abs'] = [int(x) for x in df.s * len(transactions)]
    df = df[[ 'body', 'head', 'arules', *report_str]]

    df = df.rename(columns={'b': 'body_counts', 
                            'h': 'head_counts',
                            'a': 'body_&_head_counts',
                            's': 'support',
                            'c': 'confidence',
                            'l': 'lift'})

    df = df.sort_values('confidence',ascending=False)
    return df

## Finding rules from a list of transactions


The documentation of the package can be found [here](https://borgelt.net/pyfim.html).

Documentation of how to pass inputs to `apriori` can be found [here](https://borgelt.net/doc/apriori/apriori.html).


Some takeaways:

- **`supp`**: minimum support of a rule, default is 10. Positive: percentage, Negative: absolute number. 
- **`zmin`**: minimum number of items per rule (default: 1)
- **`zmax`**: maximum number of items per rule (default: no limit)


In [284]:
report_str = 'bhascl'

rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=-5, conf=35)
df_rules = parse_as_df_bhascl(rules)

In [285]:
df_rules

Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 2)",1,"{2, 100} => 1",5,6,5,0.714286,1.0,1.166667
1,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
2,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4
3,"(2, 1)",23,"{1, 2} => 23",5,2,2,0.285714,0.4,1.4
4,"(100, 1)",23,"{1, 100} => 23",5,2,2,0.285714,0.4,1.4
5,"(100, 2, 1)",23,"{1, 2, 100} => 23",5,2,2,0.285714,0.4,1.4
6,"(100, 2)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
7,"(2, 1)",24,"{1, 2} => 24",5,2,2,0.285714,0.4,1.4
8,"(100, 1)",24,"{1, 100} => 24",5,2,2,0.285714,0.4,1.4
9,"(100, 2, 1)",24,"{1, 2, 100} => 24",5,2,2,0.285714,0.4,1.4


## Finding rules from a list of transactions with restrictions

We want to find only rules that happen to have certain items in the rhs without finding all rules and then filtering those who have in the rhs whatever we want.


Note that the item base (the set of all considered items) is often not given explicitly, but only implicitly as the union of all given transactions.However, it is also possible to specify the item base explicitly with an optional item appearances file. This can be useful, for example, if one wants to restrict the analysis to a subset of all items. It can also be used to specify that certain items should only appear in the antecedents or only in the consequents of reported association rules. To do this we can use the **`appear`** argument in `fim.arules`.


```
appear  dictionary mapping items to item appearance indicators,
            with the key None referring to the default item appearance.
            (If None does not occur as a key or no dictionary is given,
            the default item appearance indicator is 'both'.)
            * item may not appear anywhere in a rule:
              '-', 'n', 'none', 'neither', 'ignore'
            * item may appear only in rule body/antecedent:
              'i', 'in', 'inp', 'input', 'b', 'body',
              'a', 'ante', 'antecedent'
            * item may appear only in rule head/consequent:
              'o', 'out',      'output', 'h', 'head',
              'c', 'cons', 'consequent'
            * item may appear anywhere in a rule:
              'io', 'i&o', 'inout', 'in&out', 'bh', 'b&h', 'both'
```              



Consider the case where one wants to build only rules in which the head/consequent is a label. Then one can use 

#####  Item may not appear anywhere in a rule with   `appear = { item_1: 'none', item_2: 'none', ...}`

Let us see that a couple of examples restricting rules in body, head, or anywhere

```
    * item may not appear anywhere in a rule:
      '-', 'n', 'none', 'neither', 'ignore'
```

In [286]:
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30)
df_rules = parse_as_df_bhascl(rules)

# remove any rules with item `1` in either body or head
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={1:'n'})
df_rules_no_1 = parse_as_df_bhascl(rules)

In [310]:
print('\nRules (given conf and supp) with no 1 in body or head')
display(df_rules_no_1)

print('\nAll rules (given conf and supp)')
display(df_rules)


Rules (given conf and supp) with no 1 in body or head


Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 2)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
1,"(100, 2)",24,"{2, 100} => 24",5,2,2,0.285714,0.4,1.4



All rules (given conf and supp)


Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 2)",1,"{2, 100} => 1",5,6,5,0.714286,1.0,1.166667
1,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
2,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4
3,"(2, 1)",23,"{1, 2} => 23",5,2,2,0.285714,0.4,1.4
4,"(100, 1)",23,"{1, 100} => 23",5,2,2,0.285714,0.4,1.4
5,"(100, 2, 1)",23,"{1, 2, 100} => 23",5,2,2,0.285714,0.4,1.4
6,"(100, 2)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
7,"(2, 1)",24,"{1, 2} => 24",5,2,2,0.285714,0.4,1.4
8,"(100, 1)",24,"{1, 100} => 24",5,2,2,0.285714,0.4,1.4
9,"(100, 2, 1)",24,"{1, 2, 100} => 24",5,2,2,0.285714,0.4,1.4


#####  Item may appear only in rule body  `appear = { item_1: 'b', item_2: 'b', ...}` (item can't be in the head)

Remove all rules with head containing a list of items using  `appear = { item_1: 'b', item_2: 'b', ...}` (because if items appear they have to be in the rule body).

```
    * item may appear only in rule body/antecedent:
      'i', 'in', 'inp', 'input', 'b', 'body',
      'a', 'ante', 'antecedent'
```



In [437]:
print('\nAll rules (given conf and supp) where, 1, 24, 23 cannot be in the head')
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={24:'b', 23:'b', 1:'b'})
df_rules_restricted = parse_as_df_bhascl(rules)
display(df_rules_restricted)

print('\nAll rules (given conf and supp)')
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30)
df_rules = parse_as_df_bhascl(rules)
display(df_rules)


All rules (given conf and supp) where, 1, 24, 23 cannot be in the head


Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
1,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4



All rules (given conf and supp)


Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 2)",1,"{2, 100} => 1",5,6,5,0.714286,1.0,1.166667
1,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
2,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4
3,"(2, 1)",23,"{1, 2} => 23",5,2,2,0.285714,0.4,1.4
4,"(100, 1)",23,"{1, 100} => 23",5,2,2,0.285714,0.4,1.4
5,"(100, 2, 1)",23,"{1, 2, 100} => 23",5,2,2,0.285714,0.4,1.4
6,"(100, 2)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
7,"(2, 1)",24,"{1, 2} => 24",5,2,2,0.285714,0.4,1.4
8,"(100, 1)",24,"{1, 100} => 24",5,2,2,0.285714,0.4,1.4
9,"(100, 2, 1)",24,"{1, 2, 100} => 24",5,2,2,0.285714,0.4,1.4


What this example is showing is that, if know all items that we want in the body of a rule (but we NEVER want them  in the head) we can pass them as a dict 

#### Item may appear only in rule head/consequent with  `appear = { item_1: 'h', item_2: 'h', ...}`


```
    * item may appear only in rule head/consequent:
      'o', 'out',      'output', 'h', 'heaÇd',
      'c', 'cons', 'consequent'
```

⚠️ This does not work as expected, note there are rules such as `{1, 100} => 2	` with 1 in the body



In [436]:
print('\nAll rules (given conf and supp) where, if 24 or 23 present, it is in the body')
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={100:'head'})
df_rules_restricted = parse_as_df_bhascl(rules)
display(df_rules_restricted)

print('\nAll rules (given conf and supp)')
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30)
df_rules = parse_as_df_bhascl(rules)
display(df_rules)


All rules (given conf and supp) where, if 24 or 23 present, it is in the body


Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(2, 100)",1,"{2, 100} => 1",5,6,5,0.714286,1.0,1.166667
1,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4
2,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
3,"(100, 1)",23,"{1, 100} => 23",5,2,2,0.285714,0.4,1.4
4,"(2, 1)",23,"{1, 2} => 23",5,2,2,0.285714,0.4,1.4
5,"(2, 100, 1)",23,"{1, 2, 100} => 23",5,2,2,0.285714,0.4,1.4
6,"(2, 100)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
7,"(100, 1)",24,"{1, 100} => 24",5,2,2,0.285714,0.4,1.4
8,"(2, 1)",24,"{1, 2} => 24",5,2,2,0.285714,0.4,1.4
9,"(2, 100, 1)",24,"{1, 2, 100} => 24",5,2,2,0.285714,0.4,1.4



All rules (given conf and supp)


Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 2)",1,"{2, 100} => 1",5,6,5,0.714286,1.0,1.166667
1,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
2,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4
3,"(2, 1)",23,"{1, 2} => 23",5,2,2,0.285714,0.4,1.4
4,"(100, 1)",23,"{1, 100} => 23",5,2,2,0.285714,0.4,1.4
5,"(100, 2, 1)",23,"{1, 2, 100} => 23",5,2,2,0.285714,0.4,1.4
6,"(100, 2)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
7,"(2, 1)",24,"{1, 2} => 24",5,2,2,0.285714,0.4,1.4
8,"(100, 1)",24,"{1, 100} => 24",5,2,2,0.285714,0.4,1.4
9,"(100, 2, 1)",24,"{1, 2, 100} => 24",5,2,2,0.285714,0.4,1.4


## Findining rules from data containing multiplicity/counts

If we happen to have our data grouped by counts we can also do rule mining and consider transactions with different weights (counts). To do this, we need the data to be stored as a dict where keys are transactions and values are multiplicities of the transaction.

Let us see that we get the same results as before if we input transactions with counts

In [15]:
from collections import Counter
transactions_dict = Counter([tuple(x) for x in  transactions])
transactions_dict

Counter({(1, 2, 100, 23): 2,
         (1, 2, 100, 24): 2,
         (1, 2, 3, 100): 1,
         (1, 3): 1,
         (5, 7, 8): 1})

In [456]:
rules = fim.arules(transactions_dict, report=report_str, zmin=3, zmax=-1, supp=50, conf=30)
df_rules_from_counts = parse_as_df_bhascl(rules)

In [457]:
df_rules_from_counts

Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 2)",1,"{2, 100} => 1",5,6,5,0.714286,1.0,1.166667
1,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
2,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4
3,"(2, 1)",23,"{1, 2} => 23",5,2,2,0.285714,0.4,1.4
4,"(100, 1)",23,"{1, 100} => 23",5,2,2,0.285714,0.4,1.4
5,"(100, 2, 1)",23,"{1, 2, 100} => 23",5,2,2,0.285714,0.4,1.4
6,"(100, 2)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
7,"(2, 1)",24,"{1, 2} => 24",5,2,2,0.285714,0.4,1.4
8,"(100, 1)",24,"{1, 100} => 24",5,2,2,0.285714,0.4,1.4
9,"(100, 2, 1)",24,"{1, 2, 100} => 24",5,2,2,0.285714,0.4,1.4


Let us test that the rules extracted from counts are the same as the rules extracted from raw transactions

In [405]:
pd.testing.assert_frame_equal(df_rules_from_counts, df_rules)

In [406]:
df_rules_from_counts

Unnamed: 0,body,head,arules,body_counts,head_counts,body_&_head_counts,support,confidence,lift
0,"(100, 2)",1,"{2, 100} => 1",5,6,5,0.714286,1.0,1.166667
1,"(100, 1)",2,"{1, 100} => 2",5,5,5,0.714286,1.0,1.4
2,"(2, 1)",100,"{1, 2} => 100",5,5,5,0.714286,1.0,1.4
3,"(2, 1)",23,"{1, 2} => 23",5,2,2,0.285714,0.4,1.4
4,"(100, 1)",23,"{1, 100} => 23",5,2,2,0.285714,0.4,1.4
5,"(100, 2, 1)",23,"{1, 2, 100} => 23",5,2,2,0.285714,0.4,1.4
6,"(100, 2)",23,"{2, 100} => 23",5,2,2,0.285714,0.4,1.4
7,"(2, 1)",24,"{1, 2} => 24",5,2,2,0.285714,0.4,1.4
8,"(100, 1)",24,"{1, 100} => 24",5,2,2,0.285714,0.4,1.4
9,"(100, 2, 1)",24,"{1, 2, 100} => 24",5,2,2,0.285714,0.4,1.4


### MWE to showcase appear does not work as expected for `head`

In [401]:
transactions = [ [1,2,3,100],
  [1,3],
  [1,2,100,23], 
  [1,2,100,24], 
  [1,2,100,24], 
  [1,2,100,23], 
  [5,7,8]
]

In [404]:
report_str = 'bhascl'
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30)
rules

[(1, (100, 2), 5, 6, 5, 0.7142857142857143, 1.0, 1.1666666666666667),
 (2, (100, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (100, (2, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (23, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 2), 5, 2, 2, 0.2857142857142857, 0.4, 1.4)]

In [407]:
# RESULTS AS EXPECTED: You can't have rules with 24 in the head (if it appears 24 it has to be in the body)
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={24:'b'})
rules

[(1, (100, 2), 5, 6, 5, 0.7142857142857143, 1.0, 1.1666666666666667),
 (2, (100, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (100, (2, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (23, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2), 5, 2, 2, 0.2857142857142857, 0.4, 1.4)]

In [408]:
# RESULT AS EXPECTED: You can't have rules with 24 or 23 in the head (if it appears 23 or 24 it has to be in the body)
# If you want to forbid certain items [i1,i2,i3...] in the head use "appear={i1:'b', i2:'b', i3:'b' }"
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={24:'b', 23:'b'})
rules

[(1, (100, 2), 5, 6, 5, 0.7142857142857143, 1.0, 1.1666666666666667),
 (2, (100, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (100, (2, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4)]

In [468]:
# RESULT NOT EXPECTED: There are rules with 100 in the body
# I was expecting you can't have rules with 100 in the body (if it 100 appears it has to be head)
# We get rules with item 100 in 
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={100:'b'})
rules

[(1, (2, 100), 5, 6, 5, 0.7142857142857143, 1.0, 1.1666666666666667),
 (2, (100, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (23, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (2, 100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (2, 100), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (2, 100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (2, 100), 5, 2, 2, 0.2857142857142857, 0.4, 1.4)]

In [471]:
# RESULT NOT EXPECTED: There are rules with 100 in the body
# I was expecting you can't have rules with 100 in the body (if it 100 appears it has to be head)
# We get rules with item 100 in 
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={23:'h'})
rules

[(1, (100, 2), 5, 6, 5, 0.7142857142857143, 1.0, 1.1666666666666667),
 (2, (100, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (100, (2, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (23, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 2), 5, 2, 2, 0.2857142857142857, 0.4, 1.4)]

### MWE unexpected behaviour using `appear={100:'head'}`

In [463]:
import pprint 
import fim 

transactions = [ [1,2,3,100],
  [1,3],
  [1,2,100,23], 
  [1,2,100,24], 
  [1,2,100,24], 
  [1,2,100,23], 
  [5,7,8]
]

report_str = 'bhascl'
print("\nOriginal rules without restriction (beyond supp and conf)\n")
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30)
pprint.pprint(rules)


print("\nRules restricting 24 to the body (works as expected, removing rules with 24 in the head)\n")
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={24:'body'})
pprint.pprint(rules)


print("\nRules restricting 100 to the head (here I can see rules with item 100 in the body, the documentation says item may appear only in rule head/consequent)\n")
rules = fim.arules(transactions, report=report_str, zmin=3, zmax=-1, supp=50, conf=30, appear={100:'head'})
pprint.pprint(rules)



Original rules without retriction (beyond supp and conf)

[(1, (100, 2), 5, 6, 5, 0.7142857142857143, 1.0, 1.1666666666666667),
 (2, (100, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (100, (2, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (23, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (23, (100, 2), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 2, 1), 5, 2, 2, 0.2857142857142857, 0.4, 1.4),
 (24, (100, 2), 5, 2, 2, 0.2857142857142857, 0.4, 1.4)]

Rules restricting 24 to the body (works as expected, removing rules with 24 in the head)

[(1, (100, 2), 5, 6, 5, 0.7142857142857143, 1.0, 1.1666666666666667),
 (2, (100, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (100, (2, 1), 5, 5, 5, 0.7142857142857143, 1.0, 1.4),
 (23, (2, 1), 5, 2, 2, 0.285714285714285