# Checkpoint Objective
Let's say you are a Machine Learning engineer working for a clothing company and you want to adopt new strategies  to improve the company profit .

Instructions
Use this dataset and the association rules mining to find new marketing plans . 

Note here that one of the strategies can be based on which items should be put together

dataset =[['Skirt', 'Sneakers', 'Scarf', 'Pants', 'Hat'],

         ['Sunglasses', 'Skirt', 'Sneakers', 'Pants', 'Hat'],

         ['Dress', 'Sandals', 'Scarf', 'Pants', 'Heels'],

         ['Dress', 'Necklace', 'Earrings', 'Scarf', 'Hat', 'Heels', 'Hat'],

         ['Earrings', 'Skirt', 'Skirt', 'Scarf', 'Shirt', 'Pants']]

Bonus: try to do some visualization before applying the Apriori algorithm.

In [1]:
# 1. Apriori preparation
# Import necessary libraries
import numpy as np
import pandas as pd
import mlxtend
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Compose dataset
dataset = [['Skirt', 'Sneakers', 'Scarf', 'Pants', 'Hat'],
           ['Sunglasses', 'Skirt', 'Sneakers', 'Pants', 'Hat'],
           ['Dress', 'Sandals', 'Scarf', 'Pants', 'Heels'],
           ['Dress', 'Necklace', 'Earrings', 'Scarf', 'Hat', 'Heels', 'Hat'],
           ['Earrings', 'Skirt', 'Skirt', 'Scarf', 'Shirt', 'Pants']]

# Apply one-hot-encoding on our dataset
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
dt = pd.DataFrame(te_ary, columns = te.columns_)

In [2]:
# Visualize the dataset
dt

Unnamed: 0,Dress,Earrings,Hat,Heels,Necklace,Pants,Sandals,Scarf,Shirt,Skirt,Sneakers,Sunglasses
0,False,False,True,False,False,True,False,True,False,True,True,False
1,False,False,True,False,False,True,False,False,False,True,True,True
2,True,False,False,True,False,True,True,True,False,False,False,False
3,True,True,True,True,True,False,False,True,False,False,False,False
4,False,True,False,False,False,True,False,True,True,True,False,False


In [3]:
# Apply the Apriori algorithm to find frequent itemsets with a minimum support of 0.5
frequent_itemsets_index = apriori(dt, min_support=0.5)

# Using column names instead of column indexes
frequent_itemsets_name=apriori(dt, min_support=0.5, use_colnames=True)

# Combine the 2 dataframes
frequent_itemsets = pd.concat([frequent_itemsets_index, frequent_itemsets_name], axis=1)
frequent_itemsets.iloc[:, [0, 1, 3]]

Unnamed: 0,support,itemsets,itemsets.1
0,0.6,(2),(Hat)
1,0.8,(5),(Pants)
2,0.8,(7),(Scarf)
3,0.6,(9),(Skirt)
4,0.6,"(5, 7)","(Pants, Scarf)"
5,0.6,"(9, 5)","(Pants, Skirt)"


In [4]:
# Find association rules with minimum confidence of 0.5
association_rules_dt = association_rules(frequent_itemsets_name, metric="confidence", min_threshold=0.5)
association_rules_dt

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Pants),(Scarf),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
1,(Scarf),(Pants),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
2,(Pants),(Skirt),0.8,0.6,0.6,0.75,1.25,0.12,1.6
3,(Skirt),(Pants),0.6,0.8,0.6,1.0,1.25,0.12,inf


# Let's do the same checkpoint but with a bigger dataset !

In [5]:
# Load the data
data = pd.read_csv('25.Market_Basket_Optimisation_AmaniYch.ipynb.csv')
data.head()

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,


In [6]:
# Remove missing values and put the remaining values in a series
data = data.apply(lambda row: row.dropna().tolist(), axis=1).tolist()

In [7]:
# Encode the data
te_ary = te.fit(data).transform(data)
dt = pd.DataFrame(te_ary, columns = te.columns_)
dt.head()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [8]:
# Apply the Apriori algorithm to find frequent itemsets with a minimum support of 0.05
frequent_itemsets_index = apriori(dt, min_support=0.05)

# Using column names instead of column indexes
frequent_itemsets_name=apriori(dt, min_support=0.05, use_colnames=True)

# Combine the 2 dataframes
frequent_itemsets = pd.concat([frequent_itemsets_index, frequent_itemsets_name], axis=1)
frequent_itemsets.iloc[:, [0, 1, 3]]

Unnamed: 0,support,itemsets,itemsets.1
0,0.0872,(15),(burgers)
1,0.081067,(17),(cake)
2,0.06,(23),(chicken)
3,0.163867,(25),(chocolate)
4,0.0804,(30),(cookies)
5,0.051067,(31),(cooking oil)
6,0.179733,(37),(eggs)
7,0.079333,(40),(escalope)
8,0.170933,(43),(french fries)
9,0.0632,(48),(frozen smoothie)


In [9]:
# Find association rules with minimum confidence of 0.05
association_rules_dt = association_rules(frequent_itemsets_name, metric="confidence", min_threshold=0.05)
association_rules_dt

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(mineral water),(chocolate),0.238267,0.163867,0.052667,0.221041,1.348907,0.013623,1.073398
1,(chocolate),(mineral water),0.163867,0.238267,0.052667,0.3214,1.348907,0.013623,1.122506
2,(mineral water),(eggs),0.238267,0.179733,0.050933,0.213766,1.189351,0.008109,1.043286
3,(eggs),(mineral water),0.179733,0.238267,0.050933,0.283383,1.189351,0.008109,1.062957
4,(mineral water),(spaghetti),0.238267,0.174133,0.059733,0.250699,1.439698,0.018243,1.102184
5,(spaghetti),(mineral water),0.174133,0.238267,0.059733,0.343032,1.439698,0.018243,1.159468
