# Aripori Association Mining Test

## Imports

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mlxtend.frequent_patterns import apriori, association_rules


## Data Exploration

In [4]:
# df = pd.read_csv('store_data.csv')
df = pd.read_excel('online_retail_uci.xlsx')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [None]:
data.columns

In [None]:
data.Country.unique()

## Data Cleanup

In [None]:
df['Description'] = df['Description'].str.strip()

data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True)

data['InvoiceNo'] = data['InvoiceNo'].astype('str')

# Shopping was done on credit
data = data[~data['InvoiceNo'].str.contains('C')]


In [11]:

# Transactions done in France
basket_France = (df[df['Country'] =="France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))
  
# Transactions done in the United Kingdom
basket_UK = (df[df['Country'] =="United Kingdom"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

# Transactions done in Malta
basket_MT = (df[df['Country'] =="Malta"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

### One Hot encoding

In [12]:
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1

In [13]:
basket_encoded = basket_France.applymap(hot_encode)
basket_France = basket_encoded

basket_encoded = basket_UK.applymap(hot_encode)
basket_UK = basket_encoded

basket_encoded = basket_MT.applymap(hot_encode)
basket_MT = basket_encoded

## Model Building

In [14]:
freq = apriori(basket_France, min_support=0.05,use_colnames= True)

fr_rules = association_rules(freq,metric='lift',min_threshold=1)
fr_rules = fr_rules.sort_values(['confidence', 'lift'], ascending =[False, False])
fr_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
24,(JUMBO BAG WOODLAND ANIMALS),(POSTAGE),0.065076,0.650759,0.065076,1.0,1.536667,0.022727,inf
184,"(SET/20 RED RETROSPOT PAPER NAPKINS , SET/6 RE...",(SET/6 RED SPOTTY PAPER PLATES),0.086768,0.10846,0.084599,0.975,8.9895,0.075188,35.661605
185,"(SET/20 RED RETROSPOT PAPER NAPKINS , SET/6 RE...",(SET/6 RED SPOTTY PAPER CUPS),0.086768,0.117137,0.084599,0.975,8.323611,0.074435,35.314534
191,"(SET/20 RED RETROSPOT PAPER NAPKINS , SET/6 RE...",(SET/6 RED SPOTTY PAPER PLATES),0.071584,0.10846,0.069414,0.969697,8.940606,0.06165,29.420824
192,"(SET/20 RED RETROSPOT PAPER NAPKINS , SET/6 RE...",(SET/6 RED SPOTTY PAPER CUPS),0.071584,0.117137,0.069414,0.969697,8.278339,0.061029,29.13449


In [24]:
freq_mt = apriori(basket_MT, min_support=0.105,use_colnames= True)

mt_rules = association_rules(freq_mt,metric='lift',min_threshold=1)
mt_rules = mt_rules.sort_values(['confidence', 'lift'], ascending =[False, False])
mt_rules.head(20)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(CHOC TRUFFLE GOLD TRINKET POT ),(CERAMIC CAKE STAND + HANGING CAKES),0.2,0.2,0.2,1.0,5.0,0.16,inf
1,(CERAMIC CAKE STAND + HANGING CAKES),(CHOC TRUFFLE GOLD TRINKET POT ),0.2,0.2,0.2,1.0,5.0,0.16,inf
4,(PHOTO FRAME 3 CLASSIC HANGING),(CERAMIC CAKE STAND + HANGING CAKES),0.2,0.2,0.2,1.0,5.0,0.16,inf
5,(CERAMIC CAKE STAND + HANGING CAKES),(PHOTO FRAME 3 CLASSIC HANGING),0.2,0.2,0.2,1.0,5.0,0.16,inf
8,(CERAMIC CAKE STAND + HANGING CAKES),(SET/3 VANILLA SCENTED CANDLE IN BOX),0.2,0.2,0.2,1.0,5.0,0.16,inf
9,(SET/3 VANILLA SCENTED CANDLE IN BOX),(CERAMIC CAKE STAND + HANGING CAKES),0.2,0.2,0.2,1.0,5.0,0.16,inf
10,(CERAMIC CAKE STAND + HANGING CAKES),(TRIPLE PHOTO FRAME CORNICE ),0.2,0.2,0.2,1.0,5.0,0.16,inf
11,(TRIPLE PHOTO FRAME CORNICE ),(CERAMIC CAKE STAND + HANGING CAKES),0.2,0.2,0.2,1.0,5.0,0.16,inf
14,(CHOC TRUFFLE GOLD TRINKET POT ),(PHOTO FRAME 3 CLASSIC HANGING),0.2,0.2,0.2,1.0,5.0,0.16,inf
15,(PHOTO FRAME 3 CLASSIC HANGING),(CHOC TRUFFLE GOLD TRINKET POT ),0.2,0.2,0.2,1.0,5.0,0.16,inf
