# Frequent Pattern Mining

In [3]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.19.0-py2.py3-none-any.whl (1.3 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.19.0


In [4]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [5]:
dataset = [['A', 'C', 'D'],
           ['B', 'C', 'E'],
           ['A', 'B', 'C', 'E'],
           ['B', 'E']]
dataset

[['A', 'C', 'D'], ['B', 'C', 'E'], ['A', 'B', 'C', 'E'], ['B', 'E']]

In [7]:
te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_array, columns= te.columns_)
df

Unnamed: 0,A,B,C,D,E
0,True,False,True,True,False
1,False,True,True,False,True
2,True,True,True,False,True
3,False,True,False,False,True


## Apriori

In [8]:
from mlxtend.frequent_patterns import apriori

In [12]:
minimum_support = 0.5 # = 2 / len(dataset)
frequent_items = apriori(df, min_support = minimum_support, use_colnames = True)
frequent_items

Unnamed: 0,support,itemsets
0,0.5,(A)
1,0.75,(B)
2,0.75,(C)
3,0.75,(E)
4,0.5,"(C, A)"
5,0.5,"(B, C)"
6,0.75,"(B, E)"
7,0.5,"(C, E)"
8,0.5,"(B, C, E)"


In [15]:
frequent_items['length'] = frequent_items['itemsets'].apply(lambda i: len(i))
frequent_items

Unnamed: 0,support,itemsets,length
0,0.5,(A),1
1,0.75,(B),1
2,0.75,(C),1
3,0.75,(E),1
4,0.5,"(C, A)",2
5,0.5,"(B, C)",2
6,0.75,"(B, E)",2
7,0.5,"(C, E)",2
8,0.5,"(B, C, E)",3


In [16]:
frequent_items[frequent_items['length'] > 2]

Unnamed: 0,support,itemsets,length
8,0.5,"(B, C, E)",3


In [23]:
frequent_items[(frequent_items['length'] > 2) & (frequent_items['support'] >= minimum_support)]

Unnamed: 0,support,itemsets,length
8,0.5,"(B, C, E)",3


## FP-Growth

In [20]:
from mlxtend.frequent_patterns import fpgrowth
minimum_support = 0.5 # = 2 / len(dataset)
frequent_items = fpgrowth(df, min_support=minimum_support, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.75,(C)
1,0.5,(A)
2,0.75,(E)
3,0.75,(B)
4,0.5,"(C, E)"
5,0.5,"(C, A)"
6,0.75,"(B, E)"
7,0.5,"(B, C)"
8,0.5,"(B, C, E)"


In [24]:
frequent_items['length'] = frequent_items['itemsets']. apply(lambda x: len(x))
frequent_items
frequent_items[(frequent_items['length'] > 2) & (frequent_items ['support'] >= minimum_support)]

Unnamed: 0,support,itemsets,length
8,0.5,"(B, C, E)",3


# EXERCISE

In [1]:
exercise_data = [['I1', 'I2', 'I5'],
                 ['I2', 'I4'],
                 ['I2', 'I3'],
                 ['I1', 'I2', 'I4'],
                 ['I1', 'I3'],
                 ['I2', 'I3'],
                 ['I1', 'I3'],
                 ['I1', 'I2', 'I3', 'I5'],
                 ['I1', 'I2', 'I3']
                ]
exercise_data

[['I1', 'I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I4'],
 ['I1', 'I3'],
 ['I2', 'I3'],
 ['I1', 'I3'],
 ['I1', 'I2', 'I3', 'I5'],
 ['I1', 'I2', 'I3']]

In [11]:
len(exercise_data)

9

In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

In [3]:
te = TransactionEncoder()
te_array = te.fit(exercise_data).transform(exercise_data)
df = pd.DataFrame(te_array, columns= te.columns_)
df

Unnamed: 0,I1,I2,I3,I4,I5
0,True,True,False,False,True
1,False,True,False,True,False
2,False,True,True,False,False
3,True,True,False,True,False
4,True,False,True,False,False
5,False,True,True,False,False
6,True,False,True,False,False
7,True,True,True,False,True
8,True,True,True,False,False


# Apriori

In [10]:
minimum_support = 2/len(exercise_data)
frequent_items = apriori(df, min_support = minimum_support, use_colnames = True)
frequent_items

Unnamed: 0,support,itemsets
0,0.666667,(I1)
1,0.777778,(I2)
2,0.666667,(I3)
3,0.222222,(I4)
4,0.222222,(I5)
5,0.444444,"(I2, I1)"
6,0.444444,"(I3, I1)"
7,0.222222,"(I5, I1)"
8,0.444444,"(I2, I3)"
9,0.222222,"(I4, I2)"


In [5]:
frequent_items['length'] = frequent_items['itemsets'].apply(lambda i: len(i))
frequent_items

Unnamed: 0,support,itemsets,length
0,0.666667,(I1),1
1,0.777778,(I2),1
2,0.666667,(I3),1
3,0.222222,(I4),1
4,0.222222,(I5),1
5,0.444444,"(I2, I1)",2
6,0.444444,"(I3, I1)",2
7,0.222222,"(I5, I1)",2
8,0.444444,"(I2, I3)",2
9,0.222222,"(I4, I2)",2


In [6]:
frequent_items[(frequent_items['length'] > 2) & (frequent_items['support'] >= minimum_support)]

Unnamed: 0,support,itemsets,length
11,0.222222,"(I2, I3, I1)",3
12,0.222222,"(I5, I2, I1)",3


# FP-Growth

In [12]:
from mlxtend.frequent_patterns import fpgrowth
minimum_support = 2/len(exercise_data)
frequent_items = fpgrowth(df, min_support=minimum_support, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.777778,(I2)
1,0.666667,(I1)
2,0.222222,(I5)
3,0.222222,(I4)
4,0.666667,(I3)
5,0.444444,"(I2, I1)"
6,0.444444,"(I3, I1)"
7,0.222222,"(I2, I3, I1)"
8,0.222222,"(I5, I1)"
9,0.222222,"(I5, I2)"


In [8]:
frequent_items['length'] = frequent_items['itemsets']. apply(lambda x: len(x))
frequent_items

Unnamed: 0,support,itemsets,length
0,0.777778,(I2),1
1,0.666667,(I1),1
2,0.222222,(I5),1
3,0.222222,(I4),1
4,0.666667,(I3),1
5,0.444444,"(I2, I1)",2
6,0.444444,"(I3, I1)",2
7,0.222222,"(I2, I3, I1)",3
8,0.222222,"(I5, I1)",2
9,0.222222,"(I5, I2)",2


In [9]:
frequent_items[(frequent_items['length'] > 2) & (frequent_items['support'] >= minimum_support)]

Unnamed: 0,support,itemsets,length
7,0.222222,"(I2, I3, I1)",3
10,0.222222,"(I5, I2, I1)",3
