# Importing everything we need

In [1]:
from mlxtend.preprocessing import TransactionEncoder    # For transactions processing
from mlxtend.frequent_patterns import apriori, association_rules   # Functions for finding out association rules
import pandas as pd   # Pandas for dataframes
from csv import reader   # For csv files processing

# Process csv file into matrix for comfortability

In [2]:
# Dataset: list of transactions in csv format
def csv_to_stacklist(file):
    with open(file) as read_obj:
        csv_reader = reader(read_obj)
        dataset = list(csv_reader)
    return dataset

# Creating dataframe from matrix with encoder

In [3]:
def create_logical_df(dst):
    te = TransactionEncoder()
    te_ary = te.fit(dst).transform(dst)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    return df

# Setting up parameters for learning

In [4]:
path = 'groceries.csv'
support = 0.02
confidence = 0.25
lift = 1.7

# Getting to compound

In [5]:
dst = csv_to_stacklist(path)
df = create_logical_df(dst)

# Finding frequent itemsets from dataframe

In [6]:
frequent_itemsets = apriori(df, min_support=support, use_colnames=True)
print(frequent_itemsets)

      support                                         itemsets
0    0.033452                                       (UHT-milk)
1    0.052466                                           (beef)
2    0.033249                                        (berries)
3    0.026029                                      (beverages)
4    0.080529                                   (bottled beer)
..        ...                                              ...
117  0.032232                 (whole milk, whipped/sour cream)
118  0.020742                     (yogurt, whipped/sour cream)
119  0.056024                             (yogurt, whole milk)
120  0.023183  (other vegetables, root vegetables, whole milk)
121  0.022267           (other vegetables, yogurt, whole milk)

[122 rows x 2 columns]


# Finding association rules out of itemsets

In [7]:
res = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence)[
        ['antecedents', 'consequents', 'support', 'confidence', 'lift']]
print(res[res['lift'] >= lift].to_string())

                            antecedents         consequents   support  confidence      lift
5                              (butter)  (other vegetables)  0.020031    0.361468  1.868122
6                              (butter)        (whole milk)  0.027555    0.497248  1.946053
7                        (citrus fruit)  (other vegetables)  0.028876    0.348894  1.803140
9                        (citrus fruit)            (yogurt)  0.021657    0.261671  1.875752
10                               (curd)        (whole milk)  0.026131    0.490458  1.919481
11                      (domestic eggs)  (other vegetables)  0.022267    0.350962  1.813824
12                      (domestic eggs)        (whole milk)  0.029995    0.472756  1.850203
20                          (pip fruit)  (other vegetables)  0.026131    0.345430  1.785237
21                               (pork)  (other vegetables)  0.021657    0.375661  1.941476
22                    (root vegetables)  (other vegetables)  0.047382    0.43470