In [5]:
!pip install apyori



In [6]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
from apyori import apriori

#Data Preparation

In [7]:
df = pd.read_excel('./ItemList.xlsx', header=None, index_col=False)

In [8]:
df.shape

(14963, 11)

In [9]:
df.head() 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,sausage,whole milk,semi-finished bread,yogurt,,,,,,,
1,whole milk,pastry,salty snack,,,,,,,,
2,canned beer,misc. beverages,,,,,,,,,
3,sausage,hygiene articles,,,,,,,,,
4,soda,pickled vegetables,,,,,,,,,


In [10]:
print("Total Transactions: {}".format(len(df)))

Total Transactions: 14963


In [123]:
x = df.values
x = [sub[sub == sub].tolist() for sub in x if sub[sub == sub].tolist()]
transactions = x

In [124]:
transactions[0:10]

[['sausage', 'whole milk', 'semi-finished bread', 'yogurt'],
 ['whole milk', 'pastry', 'salty snack'],
 ['canned beer', 'misc. beverages'],
 ['sausage', 'hygiene articles'],
 ['soda', 'pickled vegetables'],
 ['frankfurter', 'curd'],
 ['sausage', 'whole milk', 'rolls/buns'],
 ['whole milk', 'soda'],
 ['beef', 'white bread'],
 ['frankfurter', 'soda', 'whipped/sour cream']]

# Association Rules

In [125]:
rules = apriori(transactions,min_support=0.00030,min_confidance=0.05,min_lift=3,min_length=2,target="rules")
association_results = list(rules)

In [126]:
for item in association_results:

    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    print("Support: " + str(item[1]))

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")

Rule: specialty chocolate -> frozen fish
Support: 0.0003341575887188398
Confidence: 0.049019607843137254
Lift: 3.0689556157190907
Rule: liver loaf -> fruit/vegetable juice
Support: 0.00040098910646260775
Confidence: 0.011787819253438114
Lift: 3.52762278978389
Rule: ham -> pickled vegetables
Support: 0.0005346521419501437
Confidence: 0.03125
Lift: 3.4895055970149254
Rule: meat -> roll products
Support: 0.0003341575887188398
Confidence: 0.019841269841269844
Lift: 3.620547812620984
Rule: misc. beverages -> salt
Support: 0.0003341575887188398
Confidence: 0.0211864406779661
Lift: 3.5619405827461437
Rule: misc. beverages -> spread cheese
Support: 0.0003341575887188398
Confidence: 0.0211864406779661
Lift: 3.170127118644068
Rule: seasonal products -> soups
Support: 0.0003341575887188398
Confidence: 0.04716981132075471
Lift: 14.704205974842766
Rule: sugar -> spread cheese
Support: 0.00040098910646260775
Confidence: 0.06
Lift: 3.3878490566037733
Rule: sausage -> bottled beer
Support: 0.000334157

#Frequently Sold Products

In [127]:
df.apply(pd.value_counts).fillna(0)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Instant food products,12.0,23.0,15.0,9.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
UHT-milk,124.0,128.0,43.0,19.0,2.0,2.0,2.0,3.0,0.0,0.0,0.0
abrasive cleaner,2.0,8.0,6.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
artif. sweetener,6.0,6.0,7.0,4.0,2.0,1.0,0.0,3.0,0.0,0.0,0.0
baby cosmetics,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
white bread,92.0,157.0,63.0,25.0,11.0,4.0,5.0,2.0,3.0,0.0,0.0
white wine,53.0,80.0,25.0,10.0,1.0,2.0,4.0,1.0,0.0,0.0,0.0
whole milk,1083.0,983.0,244.0,111.0,39.0,20.0,7.0,14.0,1.0,0.0,0.0
yogurt,408.0,594.0,166.0,99.0,23.0,15.0,19.0,10.0,0.0,0.0,0.0


In [128]:
products = df.apply(pd.value_counts).fillna(0).sum(axis=1)
print(products)

Instant food products      60.0
UHT-milk                  323.0
abrasive cleaner           22.0
artif. sweetener           29.0
baby cosmetics              3.0
                          ...  
white bread               362.0
white wine                176.0
whole milk               2502.0
yogurt                   1334.0
zwieback                   60.0
Length: 167, dtype: float64


In [129]:
def ditribution_plot(x,y,name=None,xaxis=None,yaxis=None):
    fig = go.Figure([
        go.Bar(x=x, y=y)
    ])

    fig.update_layout(
        title_text=name,
        xaxis_title=xaxis,
        yaxis_title=yaxis
    )
    fig.show()

In [132]:
x = products.sort_values(ascending = False) 
x = x[:15]

ditribution_plot(x=x.index, y=x.values, name="Top 15 frequently sold products", yaxis="Number of item sold", xaxis="Product")

In [133]:
y = products.sort_values(ascending=True) 
y = y[:15]

ditribution_plot(x=y.index, y=y.values, name="15 least frequently sold products", yaxis="Number of item sold", xaxis="Product")