### Imports Required Libraries

In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/ahmud-z/data-mining-lab/refs/heads/main/report-03/transactions.csv")
df.head(2)

Unnamed: 0,Transaction,Item1,Item2,Item3,Item4,Item5
0,T1,A,B,C,D,E
1,T2,A,C,D,E,F


### Print column names with total empty/NaN value counts

In [3]:
print('Columns  -  Total Empty')
print('----------------------------')
print(df.isnull().sum())

Columns  -  Total Empty
----------------------------
Transaction    0
Item1          0
Item2          0
Item3          0
Item4          1
Item5          1
dtype: int64


### Replaces 'NaN' values with empty string

In [4]:
df = df.fillna("")
df.head(2)

Unnamed: 0,Transaction,Item1,Item2,Item3,Item4,Item5
0,T1,A,B,C,D,E
1,T2,A,C,D,E,F


### Encode the dataset into true/false values using tansaction encoder

In [5]:
te = TransactionEncoder()
encoded_array = te.fit_transform(df.values.tolist())

### Convert the encoded array into pandas dataframe

In [6]:
transaction_df = pd.DataFrame(encoded_array, columns=te.columns_)
transaction_df.head(2)

Unnamed: 0,Unnamed: 1,A,B,C,D,E,F,G,H,T1,T2,T3,T4,T5
0,False,True,True,True,True,True,False,False,False,True,False,False,False,False
1,False,True,False,True,True,True,True,False,False,False,True,False,False,False


### Extract frequent items by using fp-growth algorithm

In [7]:
frequent_items = fpgrowth(transaction_df, min_support=0.6, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.8,(E)
1,0.8,(C)
2,0.8,(A)
3,0.6,(D)
4,0.6,(B)
5,0.6,(F)
6,0.6,"(C, E)"
7,0.6,"(C, A)"
8,0.6,"(A, E)"
9,0.6,"(D, C)"


### Genreate association rules for frequent items

In [8]:
rules = association_rules(frequent_items, min_threshold=0.8)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(D),(C),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875
1,(D),(E),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875
2,"(D, C)",(E),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875
3,"(D, E)",(C),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875
4,"(C, E)",(D),0.6,0.6,0.6,1.0,1.666667,1.0,0.24,inf,1.0,1.0,1.0,1.0
5,(D),"(C, E)",0.6,0.6,0.6,1.0,1.666667,1.0,0.24,inf,1.0,1.0,1.0,1.0
6,(B),(E),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875
7,(F),(C),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875


### Print association rules with necessary columns

In [9]:
rules[["antecedents", "consequents", "support", "confidence"]]

Unnamed: 0,antecedents,consequents,support,confidence
0,(D),(C),0.6,1.0
1,(D),(E),0.6,1.0
2,"(D, C)",(E),0.6,1.0
3,"(D, E)",(C),0.6,1.0
4,"(C, E)",(D),0.6,1.0
5,(D),"(C, E)",0.6,1.0
6,(B),(E),0.6,1.0
7,(F),(C),0.6,1.0
