# Import necessary libraries

In [35]:
import pandas as pd
from apyori import apriori
import ast
import datetime

# Variables for global usage and finalized output

In [36]:
all_consequents, all_antecedents, all_conf, all_lift = [], [], [], []

In [37]:
def apriori_rg(dataset, min_support_ = 0.01):
    antecedents, consequents, consequent_conf, consequent_lift = [], [], [], []
    transactions = []
    for x in dataset:
        transactions.append(x)
    rules = list(apriori(transactions, min_support = min_support_, min_confidence = 0.7, min_lift = 1.5))
    
    print(f"*** Total number of rules formulated: {len(rules)} ***")
    
    for i in rules:
        base_item_set = str(i[2][0].items_base).removeprefix("frozenset({").removesuffix("})")
        add_item_set = str(i[2][0].items_add).removeprefix("frozenset({").removesuffix("})")
        confidence = f"{round(i[2][0].confidence * 100,2)}"
        lift = f"{round(i[2][0].lift,2)}"
        if add_item_set not in consequents:
            antecedents.append(base_item_set)
            consequents.append(add_item_set)
            consequent_conf.append(float(confidence))
            consequent_lift.append(float(lift))
            if add_item_set not in all_consequents:
                all_antecedents.append(base_item_set)
                all_consequents.append(add_item_set)
                all_conf.append(float(confidence))
                all_lift.append(float(lift))
        else:
            prev_conf = consequent_conf[consequents.index(add_item_set)]
            if(float(confidence) > prev_conf):
                antecedents[consequents.index(add_item_set)] = base_item_set
                all_antecedents[consequents.index(add_item_set)] = base_item_set
                consequent_conf[consequents.index(add_item_set)] = float(confidence)
                all_conf[consequents.index(add_item_set)] = float(confidence)
                consequent_lift[consequents.index(add_item_set)] = float(lift)
                all_lift[consequents.index(add_item_set)] = float(lift)
    print(f"*** Total number of strong rules: {len(consequents)} ***")
    print("Market Basket Analysis Rules", end="\n\n")
    for i in range(len(consequents)):
        print(f"Item set 1: {antecedents[i]}\nItem set 2: {consequents[i]}\nconfidence = {consequent_conf[i]}%\nlift = {consequent_lift[i]}", end="\n\n")

In [38]:
print("""The lower the support value, higher will be the number of rules and unique rules generated.
the higher the support value, lesser will be the rules generated but will be more quality rules.
Press enter to go with default settings.""")
loop = int(input("\nEnter number of customer segments for rule generation: "))
for i in range(loop):
    try:
        path = input("Enter dataset path: ")
        user_support = float(input("\nEnter desired support value (default 1%): "))/100
        print(f"\nRules will be based on support of {user_support*100}%")
        apriori_rg(pd.read_csv(f'./{path}.csv', header = None)[0][1:].apply(ast.literal_eval), user_support)
    except ValueError:
        print("\nRules will be based on support of 1%")
        apriori_rg(pd.read_csv(f'./{path}.csv', header = None)[0][1:].apply(ast.literal_eval))

The lower the support value, higher will be the number of rules and unique rules generated.
the higher the support value, lesser will be the rules generated but will be more quality rules.
Press enter to go with default settings.

Enter number of customer segments for rule generation: 3
Enter dataset path: elite_transactions

Enter desired support value (default 1%): 2

Rules will be based on support of 2.0%
*** Total number of rules formulated: 43 ***
*** Total number of strong rules: 3 ***
Market Basket Analysis Rules

Item set 1: 'BISCUITS', 'COLD & FROZEN FOODS', 'BAKERY ITEMS', 'FLOOR CLEANERS'
Item set 2: 'DAIRY'
confidence = 79.66%
lift = 1.65

Item set 1: 'DETERGENTS', 'DISPOSABLES/ PARTY SUPPLIES'
Item set 2: 'FLOOR CLEANERS'
confidence = 72.92%
lift = 1.68

Item set 1: 'TEA', 'NAMKEENS'
Item set 2: 'BISCUITS'
confidence = 73.91%
lift = 2.92

Enter dataset path: freq_transactions

Enter desired support value (default 1%): 2

Rules will be based on support of 2.0%
*** Total num

# Model Artifact Name convention genertation

In [39]:
dt_now = str(datetime.datetime.now())
list_dt = dt_now.split()
cur_date, curr_time =  list_dt[0], list_dt[1][:5].replace(':', '-')

name_conv = f"{cur_date}--{curr_time}_MBA_rules"

# CSV conversion (OUTPUT)

In [40]:
csv_conv = []
for i in range(len(all_consequents)):
    antecedent = all_antecedents[i]
    consequent = all_consequents[i]
    confidence_ = all_conf[i]
    lift_ = all_lift[i]
    tran = [antecedent, consequent, confidence_, lift_]
    csv_conv.append(tran)
df = pd.DataFrame(csv_conv, columns=['Antecedent', 'Consequent', 'Confidence','Lift'])
name_conv
df.to_csv(f'{name_conv}.csv')