In [None]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
data = pd.read_csv("SampleSuperstore.csv")
data.head()

Unnamed: 0,Ship Mode,Segment,Country,City,State,Postal Code,Region,Category,Sub-Category,Sales,Quantity,Discount,Profit
0,Second Class,Consumer,United States,Henderson,Kentucky,42420,South,Furniture,Bookcases,261.96,2,0.0,41.9136
1,Second Class,Consumer,United States,Henderson,Kentucky,42420,South,Furniture,Chairs,731.94,3,0.0,219.582
2,Second Class,Corporate,United States,Los Angeles,California,90036,West,Office Supplies,Labels,14.62,2,0.0,6.8714
3,Standard Class,Consumer,United States,Fort Lauderdale,Florida,33311,South,Furniture,Tables,957.5775,5,0.45,-383.031
4,Standard Class,Consumer,United States,Fort Lauderdale,Florida,33311,South,Office Supplies,Storage,22.368,2,0.2,2.5164


In [None]:
data.columns

Index(['Ship Mode', 'Segment', 'Country', 'City', 'State', 'Postal Code',
       'Region', 'Category', 'Sub-Category', 'Sales', 'Quantity', 'Discount',
       'Profit'],
      dtype='object')

In [None]:
data.State.unique()

array(['Kentucky', 'California', 'Florida', 'North Carolina',
       'Washington', 'Texas', 'Wisconsin', 'Utah', 'Nebraska',
       'Pennsylvania', 'Illinois', 'Minnesota', 'Michigan', 'Delaware',
       'Indiana', 'New York', 'Arizona', 'Virginia', 'Tennessee',
       'Alabama', 'South Carolina', 'Oregon', 'Colorado', 'Iowa', 'Ohio',
       'Missouri', 'Oklahoma', 'New Mexico', 'Louisiana', 'Connecticut',
       'New Jersey', 'Massachusetts', 'Georgia', 'Nevada', 'Rhode Island',
       'Mississippi', 'Arkansas', 'Montana'], dtype=object)

In [None]:
basket_Florida = (data[data['State']=="Florida"]
                     .groupby(['Ship Mode','Sub-Category'])['Quantity']
                     .sum().unstack().reset_index().fillna(0)
                     .set_index('Ship Mode'))

In [None]:
basket_Washington = (data[data['State']=="Washington"]
                     .groupby(['Ship Mode','Sub-Category'])['Quantity']
                     .sum().unstack().reset_index().fillna(0)
                     .set_index('Ship Mode'))

In [None]:
basket_Massachusetts = (data[data['State']=="Massachusetts"]
                     .groupby(['Ship Mode','Sub-Category'])['Quantity']
                     .sum().unstack().reset_index().fillna(0)
                     .set_index('Ship Mode'))

In [None]:
basket_Arizona = (data[data['State']=="Arizona"]
                     .groupby(['Ship Mode','Sub-Category'])['Quantity']
                     .sum().unstack().reset_index().fillna(0)
                     .set_index('Ship Mode'))

In [None]:
def hot_encode(x):
  if(x<=0):
    return 0
  if(x>=1):
    return 1

In [None]:
basket_encoded = basket_Washington.applymap(hot_encode)
basket_Washington = basket_encoded

basket_encoded = basket_Massachusetts.applymap(hot_encode)
basket_Massachusetts = basket_encoded

basket_encoded = basket_Florida.applymap(hot_encode)
basket_Florida = basket_encoded

basket_encoded = basket_Arizona.applymap(hot_encode)
basket_Arizona = basket_encoded

In [None]:
frq_items = apriori(basket_Washington, min_support=0.01, use_colnames=True)
rules = association_rules(frq_items, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence','lift'], ascending=[False,False])
#print(rules.head())
rules.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
22,(Art),(Appliances),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
23,(Appliances),(Art),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
26,(Bookcases),(Appliances),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
27,(Appliances),(Bookcases),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
30,(Paper),(Appliances),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf


In [None]:
frq_items = apriori(basket_Massachusetts, min_support=0.01, use_colnames=True)
rules = association_rules(frq_items, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence','lift'], ascending=[False,False])
#print(rules.head())
rules.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Art),(Fasteners),0.5,0.5,0.5,1.0,2.0,0.25,inf
1,(Fasteners),(Art),0.5,0.5,0.5,1.0,2.0,0.25,inf
2,(Art),(Furnishings),0.5,0.5,0.5,1.0,2.0,0.25,inf
3,(Furnishings),(Art),0.5,0.5,0.5,1.0,2.0,0.25,inf
4,(Art),(Paper),0.5,0.5,0.5,1.0,2.0,0.25,inf


In [None]:
frq_items = apriori(basket_Florida, min_support=0.01, use_colnames=True)
rules = association_rules(frq_items, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence','lift'], ascending=[False,False])
#print(rules.head())
rules.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
24,(Appliances),(Labels),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
25,(Labels),(Appliances),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
44,(Chairs),(Envelopes),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
45,(Envelopes),(Chairs),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf
50,(Phones),(Chairs),0.333333,0.333333,0.333333,1.0,3.0,0.222222,inf


In [None]:
frq_items = apriori(basket_Arizona, min_support=0.01, use_colnames=True)
rules = association_rules(frq_items, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence','lift'], ascending=[False,False])
#print(rules.head())
rules.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2,(Accessories),(Art),0.5,0.5,0.5,1.0,2.0,0.25,inf
3,(Art),(Accessories),0.5,0.5,0.5,1.0,2.0,0.25,inf
4,(Binders),(Accessories),0.5,0.5,0.5,1.0,2.0,0.25,inf
5,(Accessories),(Binders),0.5,0.5,0.5,1.0,2.0,0.25,inf
6,(Accessories),(Fasteners),0.5,0.5,0.5,1.0,2.0,0.25,inf
