In [4]:
import numpy as np
import pandas as pd
from apyori import apriori
from mlxtend.frequent_patterns import apriori, association_rules
import itertools

In [9]:
df = pd.read_csv("50data.csv")

In [10]:
df.shape

(51, 113)

In [11]:
df.head()

Unnamed: 0,almonds,antioxydant juice,asparagus,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,bramble,...,tomatoes,toothpaste,turkey,vegetables mix,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [12]:
df.sum()

almonds              27
antioxydant juice    13
asparagus            10
avocado              14
babies food           4
                     ..
whole weat flour      7
whole wheat pasta    10
whole wheat rice     12
yams                 11
yogurt cake          25
Length: 113, dtype: int64

In [14]:
first = pd.DataFrame(df.sum() / df.shape[0], columns = ["Support"]).sort_values("Support", ascending = False)
first

Unnamed: 0,Support
almonds,0.529412
yogurt cake,0.490196
avocado,0.274510
antioxydant juice,0.254902
shallot,0.254902
...,...
ground beef,0.019608
hand protein bar,0.019608
candy bars,0.000000
fresh bread,0.000000


In [15]:
arr = pd.DataFrame(first).to_numpy()

add = 0
for i in range(0,len(arr)):
    add = add + arr[i]

supp = add/len(arr)
supp = supp[0]
float(supp)
print(type(supp))
supp

<class 'numpy.float64'>


0.12233211868818311

In [16]:
first[first.Support >= supp]

Unnamed: 0,Support
almonds,0.529412
yogurt cake,0.490196
avocado,0.27451
antioxydant juice,0.254902
shallot,0.254902
vegetables mix,0.254902
tomato juice,0.254902
tomato sauce,0.235294
whole wheat rice,0.235294
escalope,0.235294


In [17]:
def ar_iterations(data, num_iter = 1, support_value = supp, iterationIndex = None):
    
    # Next Iterations
    def ar_calculation(iterationIndex = iterationIndex): 
        # Calculation of support value
        value = []
        for i in range(0, len(iterationIndex)):
            result = data.T.loc[iterationIndex[i]].sum() 
            result = len(result[result == data.T.loc[iterationIndex[i]].shape[0]]) / data.shape[0]
            value.append(result)
        # Bind results
        result = pd.DataFrame(value, columns = ["Support"])
        result["index"] = [tuple(i) for i in iterationIndex]
        result['length'] = result['index'].apply(lambda x:len(x))
        result = result.set_index("index").sort_values("Support", ascending = False)
        # Elimination by Support Value
        result = result[result.Support > support_value]
        return result    
    
    # First Iteration
    first = pd.DataFrame(data.T.sum(axis = 1) / data.shape[0], columns = ["Support"]).sort_values("Support", ascending = False)
    first = first[first.Support > support_value]
    first["length"] = 1
    
    if num_iter == 1:
        res = first.copy()
        
    # Second Iteration
    elif num_iter == 2:
        
        second = list(itertools.combinations(first.index, 2))
        second = [list(i) for i in second]
        res = ar_calculation(second)
        
    # All Iterations > 2
    else:
        nth = list(itertools.combinations(set(list(itertools.chain(*iterationIndex))), num_iter))
        nth = [list(i) for i in nth]
        res = ar_calculation(nth)
    
    return res

In [18]:
iteration1 = ar_iterations(df, num_iter=1, support_value=supp)
iteration1

Unnamed: 0,Support,length
almonds,0.529412,1
yogurt cake,0.490196,1
avocado,0.27451,1
antioxydant juice,0.254902,1
shallot,0.254902,1
vegetables mix,0.254902,1
tomato juice,0.254902,1
tomato sauce,0.235294,1
whole wheat rice,0.235294,1
escalope,0.235294,1


In [19]:
iteration2 = ar_iterations(df, num_iter=2, support_value=supp)
iteration2

Unnamed: 0_level_0,Support,length
index,Unnamed: 1_level_1,Unnamed: 2_level_1
"(almonds, yogurt cake)",0.235294,2
"(yogurt cake, avocado)",0.215686,2
"(almonds, shallot)",0.215686,2
"(yogurt cake, escalope)",0.215686,2
"(almonds, tomato sauce)",0.196078,2
"(almonds, whole wheat rice)",0.196078,2
"(almonds, whole wheat pasta)",0.196078,2
"(almonds, red wine)",0.196078,2
"(almonds, yams)",0.176471,2
"(shallot, tomato sauce)",0.176471,2


In [20]:
iteration3 = ar_iterations(df, num_iter=3, support_value=supp, iterationIndex=iteration2.index)
iteration3

Unnamed: 0_level_0,Support,length
index,Unnamed: 1_level_1,Unnamed: 2_level_1
"(tomato sauce, shallot, almonds)",0.156863,3
"(tomato sauce, yams, almonds)",0.156863,3
"(almonds, escalope, yogurt cake)",0.137255,3
"(mushroom cream sauce, almonds, light mayo)",0.137255,3
"(whole wheat rice, almonds, red wine)",0.137255,3
"(sandwich, almonds, escalope)",0.137255,3
"(sandwich, almonds, yogurt cake)",0.137255,3
"(sandwich, escalope, yogurt cake)",0.137255,3
"(escalope, shrimp, yogurt cake)",0.137255,3
"(vegetables mix, shallot, almonds)",0.137255,3


In [21]:
iteration4 = ar_iterations(df, num_iter=4, support_value=supp,iterationIndex=iteration3.index)
iteration4

Unnamed: 0_level_0,Support,length
index,Unnamed: 1_level_1,Unnamed: 2_level_1
"(escalope, sandwich, almonds, yogurt cake)",0.137255,4


In [30]:
freq_items = apriori(df, min_support = supp, use_colnames = True)
freq_items.sort_values("support", ascending = False)

Unnamed: 0,support,itemsets
0,0.529412,(almonds)
50,0.490196,(yogurt cake)
3,0.274510,(avocado)
1,0.254902,(antioxydant juice)
45,0.254902,(vegetables mix)
...,...,...
75,0.137255,"(cake, escalope)"
77,0.137255,"(sandwich, escalope)"
52,0.137255,"(burger sauce, almonds)"
80,0.137255,"(mushroom cream sauce, light mayo)"


In [34]:
df_ar = association_rules(freq_items, metric = "confidence", min_threshold = 0.0)
df_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(almonds),(avocado),0.529412,0.274510,0.156863,0.296296,1.079365,0.011534,1.030960
1,(avocado),(almonds),0.274510,0.529412,0.156863,0.571429,1.079365,0.011534,1.098039
2,(burger sauce),(almonds),0.196078,0.529412,0.137255,0.700000,1.322222,0.033449,1.568627
3,(almonds),(burger sauce),0.529412,0.196078,0.137255,0.259259,1.322222,0.033449,1.085294
4,(champagne),(almonds),0.156863,0.529412,0.137255,0.875000,1.652778,0.054210,3.764706
...,...,...,...,...,...,...,...,...,...
151,"(almonds, yogurt cake)","(sandwich, escalope)",0.235294,0.137255,0.137255,0.583333,4.250000,0.104960,2.070588
152,(escalope),"(sandwich, almonds, yogurt cake)",0.235294,0.137255,0.137255,0.583333,4.250000,0.104960,2.070588
153,(sandwich),"(almonds, escalope, yogurt cake)",0.196078,0.137255,0.137255,0.700000,5.100000,0.110342,2.875817
154,(almonds),"(sandwich, escalope, yogurt cake)",0.529412,0.137255,0.137255,0.259259,1.888889,0.064591,1.164706


In [28]:
df_ar[(df_ar.support > supp) & (df_ar.confidence > 0.5)].sort_values("confidence", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
143,"(sandwich, escalope, yogurt cake)",(almonds),0.137255,0.529412,0.137255,1.000000,1.888889,0.064591,inf
124,"(yams, tomato sauce)",(almonds),0.156863,0.529412,0.156863,1.000000,1.888889,0.073818,inf
48,(cake),(escalope),0.137255,0.235294,0.137255,1.000000,4.250000,0.104960,inf
149,"(sandwich, almonds)","(escalope, yogurt cake)",0.137255,0.215686,0.137255,1.000000,4.636364,0.107651,inf
118,"(shallot, vegetables mix)",(almonds),0.137255,0.529412,0.137255,1.000000,1.888889,0.064591,inf
...,...,...,...,...,...,...,...,...,...
122,(vegetables mix),"(shallot, almonds)",0.254902,0.215686,0.137255,0.538462,2.496503,0.082276,1.699346
64,(tomato juice),(salt),0.254902,0.156863,0.137255,0.538462,3.432692,0.097270,1.826797
121,(shallot),"(vegetables mix, almonds)",0.254902,0.156863,0.137255,0.538462,3.432692,0.097270,1.826797
71,(vegetables mix),(shallot),0.254902,0.254902,0.137255,0.538462,2.112426,0.072280,1.614379
