In [15]:
import pandas as pd
import numpy as np


import mlxtend
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [5]:
# Note for Users:
# Please ensure that your dataset is formatted as a list of items, 
# where each transaction is represented as a comma-separated string of items. 
# For example:
# olive oil, frozen smoothie, green tea, whole wheat flour, salmon

# Each transaction should be on a new line in the input file. 
# This format is required for the FP-Growth and Apriori algorithms to function correctly.


# Reading the Dataset
df = pd.read_csv("Example_Dataset.csv", header=None)
df

# Pre-Processing the Data

In [12]:
preprocessed_df = df.copy()

# Creating a new column which contains all the different items as a list
list_of_items_col = 'List of Items'
if list_of_items_col not in preprocessed_df.columns:
    preprocessed_df[list_of_items_col] = preprocessed_df.apply(lambda x: x.dropna().tolist(), axis = 1)

# Dropping all other columns
preprocessed_df = preprocessed_df[[list_of_items_col]]
# preprocessed_df


# Encoding 
transactions = preprocessed_df[list_of_items_col].tolist()
transaction_encode = TransactionEncoder()
transaction_encode.fit(transactions)

encoded_transactions = transaction_encode.transform(transactions)

preprocessed_df = pd.DataFrame(encoded_transactions, columns=transaction_encode.columns_)

In [13]:

# Note:
# The 'preprocessed_df' DataFrame now contains the encoded transactions where each item is represented as a column header.
# Each row indicates the presence (True) or absence (False) of items in that specific transactions.
# For example, the structure will look like this:
#
# avocado   honey   mineral water   frozen smoothie   eggs   spinach   burgers
# 0    True    False         True            False      True     False      True
# 1   False     True        False             True     False      True     False

preprocessed_df

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7497,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7498,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7499,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


# Implementing Apriori Algorithm

## Support

In [20]:
minimum_support_value = input("Input the Minimum Support Count: ")

frequent_items = apriori(preprocessed_df, min_support = float(minimum_support_value), use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.020397,(almonds)
1,0.033329,(avocado)
2,0.010799,(barbecue sauce)
3,0.014265,(black tea)
4,0.011465,(body spray)
...,...,...
252,0.011065,"(milk, mineral water, ground beef)"
253,0.017064,"(spaghetti, mineral water, ground beef)"
254,0.015731,"(milk, spaghetti, mineral water)"
255,0.010265,"(olive oil, mineral water, spaghetti)"


## Confidence

In [None]:
confidence_threshold = input("Input the Confidence Threshold")

confidence = association_rules(frequent_items, min_threshold = float(confidence_threshold))