In [1]:
import pandas as pd
import numpy as np

In [2]:
dataset = [
    ['Skirt', 'Sneakers', 'Scarf', 'Pants', 'Hat'],
    ['Sunglasses', 'Skirt', 'Sneakers', 'Pants', 'Hat'],
    ['Dress', 'Sandals', 'Scarf', 'Pants', 'Heels'],
    ['Dress', 'Necklace', 'Earrings', 'Scarf', 'Hat', 'Heels', 'Hat'],
    ['Earrings', 'Skirt', 'Skirt', 'Scarf', 'Shirt', 'Pants']
    ]

#**Data Preparation**

In [3]:
from mlxtend.preprocessing import TransactionEncoder

In [4]:
te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_array, columns=te.columns_)

In [5]:
df

Unnamed: 0,Dress,Earrings,Hat,Heels,Necklace,Pants,Sandals,Scarf,Shirt,Skirt,Sneakers,Sunglasses
0,False,False,True,False,False,True,False,True,False,True,True,False
1,False,False,True,False,False,True,False,False,False,True,True,True
2,True,False,False,True,False,True,True,True,False,False,False,False
3,True,True,True,True,True,False,False,True,False,False,False,False
4,False,True,False,False,False,True,False,True,True,True,False,False


#**Some visualization**

In [6]:
import plotly.express as px

In [7]:
for feat in df.columns:
  print(feat)
  fig = px.bar(df, x=feat)
  fig.update_traces(marker=dict(color='red', opacity=0.5, line=dict(color='red', width=0.5)))
  fig.show()

Dress


Earrings


Hat


Heels


Necklace


Pants


Sandals


Scarf


Shirt


Skirt


Sneakers


Sunglasses


#**Apriori Algorithm**

In [8]:
from mlxtend.frequent_patterns import *

In [9]:
frequent_items = apriori(df, min_support=0.6, use_colnames=True)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [10]:
frequent_items


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Unnamed: 0,support,itemsets
0,0.6,(Hat)
1,0.8,(Pants)
2,0.8,(Scarf)
3,0.6,(Skirt)
4,0.6,"(Scarf, Pants)"
5,0.6,"(Pants, Skirt)"


In [11]:
df_lift = association_rules(frequent_items, metric='lift')
df_lift


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Scarf),(Pants),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
1,(Pants),(Scarf),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
2,(Pants),(Skirt),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1.0
3,(Skirt),(Pants),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5


**The items that should be put together are :**

**✈ *Pants*** WITH ***Skirt***

**✈ *Skirt*** WITH ***Pants***


#**Now let use a big dataset**

##**Import the dataset**

In [12]:
from pydrive.auth import GoogleAuth

from pydrive.drive import GoogleDrive

from google.colab import auth

from oauth2client.client import GoogleCredentials


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [13]:
auth.authenticate_user()

gauth = GoogleAuth()

gauth.credentials = GoogleCredentials.get_application_default()

drive = GoogleDrive(gauth)

file_download = drive.CreateFile({'id':'109dPPJzZbjzMUp3JQBx5rQ538fzMNYzU'})

file_download.GetContentFile('Market_Basket_Optimisation.csv')


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [14]:
big_df = pd.read_csv('Market_Basket_Optimisation.csv')


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [15]:
big_df


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7496,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7497,chicken,,,,,,,,,,,,,,,,,,,
7498,escalope,green tea,,,,,,,,,,,,,,,,,,


In [49]:
big_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7500 entries, 0 to 7499
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   shrimp             7500 non-null   object 
 1   almonds            5746 non-null   object 
 2   avocado            4388 non-null   object 
 3   vegetables mix     3344 non-null   object 
 4   green grapes       2528 non-null   object 
 5   whole weat flour   1863 non-null   object 
 6   yams               1368 non-null   object 
 7   cottage cheese     980 non-null    object 
 8   energy drink       653 non-null    object 
 9   tomato juice       394 non-null    object 
 10  low fat yogurt     255 non-null    object 
 11  green tea          153 non-null    object 
 12  honey              86 non-null     object 
 13  salad              46 non-null     object 
 14  mineral water      24 non-null     object 
 15  salmon             7 non-null      object 
 16  antioxydant juice  3 non


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



##**Data Preprocessing**

In [34]:
datasets = pd.DataFrame({})
for col in big_df.columns:
  n = big_df[col].dropna().values
  datasets[col] = big_df[col].replace(n, col)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [35]:
datasets


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,shrimp,almonds,avocado,,,,,,,,,,,,,,,,,
1,shrimp,,,,,,,,,,,,,,,,,,,
2,shrimp,almonds,,,,,,,,,,,,,,,,,,
3,shrimp,almonds,avocado,vegetables mix,green grapes,,,,,,,,,,,,,,,
4,shrimp,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,shrimp,almonds,avocado,,,,,,,,,,,,,,,,,
7496,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,,,,,,,,,,,,,,
7497,shrimp,,,,,,,,,,,,,,,,,,,
7498,shrimp,almonds,,,,,,,,,,,,,,,,,,


In [36]:
data = []
m, _ = datasets.shape
for row in range(m):
  n = datasets.iloc[row].dropna().values.tolist()
  data.append(n)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [45]:
data


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



[['shrimp', 'almonds', 'avocado'],
 ['shrimp'],
 ['shrimp', 'almonds'],
 ['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes'],
 ['shrimp'],
 ['shrimp', 'almonds'],
 ['shrimp', 'almonds', 'avocado'],
 ['shrimp', 'almonds', 'avocado'],
 ['shrimp'],
 ['shrimp', 'almonds'],
 ['shrimp'],
 ['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes'],
 ['shrimp', 'almonds', 'avocado'],
 ['shrimp', 'almonds'],
 ['shrimp'],
 ['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams'],
 ['shrimp', 'almonds'],
 ['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams',
  'cottage cheese',
  'energy drink',
  'tomato juice'],
 ['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes'],
 ['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams'],
 ['shrimp', 'almonds'],
 ['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  '

##**Define the dataframe**

In [47]:
te = TransactionEncoder()
array = te.fit(data).transform(data)
df2 = pd.DataFrame(array, columns=te.columns_)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [50]:
df2['olive oil'] = False # Because the 'olive oil' are not in any transaction


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [51]:
df2


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Unnamed: 0,almonds,antioxydant juice,avocado,cottage cheese,energy drink,frozen smoothie,green grapes,green tea,honey,low fat yogurt,mineral water,salad,salmon,shrimp,spinach,tomato juice,vegetables mix,whole weat flour,yams,olive oil
0,True,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
2,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
3,True,False,True,False,False,False,True,False,False,False,False,False,False,True,False,False,True,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,True,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
7496,True,False,True,False,False,False,True,False,False,False,False,False,False,True,False,False,True,True,False,False
7497,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
7498,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False


# **Apriori Algorithm**

In [52]:
frequent_items = apriori(df2, min_support=0.5, use_colnames=True)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [55]:
print(frequent_items)
df_lift = association_rules(frequent_items, metric='lift')

    support                    itemsets
0  0.766133                   (almonds)
1  0.585067                   (avocado)
2  1.000000                    (shrimp)
3  0.585067          (avocado, almonds)
4  0.766133           (shrimp, almonds)
5  0.585067           (shrimp, avocado)
6  0.585067  (shrimp, avocado, almonds)



`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [62]:
df_lift


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(avocado),(almonds),0.585067,0.766133,0.585067,1.0,1.305256,0.136828,inf,0.563625
1,(almonds),(avocado),0.766133,0.585067,0.585067,0.763662,1.305256,0.136828,1.755675,1.0
2,(shrimp),(almonds),1.0,0.766133,0.766133,0.766133,1.0,0.0,1.0,0.0
3,(almonds),(shrimp),0.766133,1.0,0.766133,1.0,1.0,0.0,inf,0.0
4,(shrimp),(avocado),1.0,0.585067,0.585067,0.585067,1.0,0.0,1.0,0.0
5,(avocado),(shrimp),0.585067,1.0,0.585067,1.0,1.0,0.0,inf,0.0
6,"(shrimp, avocado)",(almonds),0.585067,0.766133,0.585067,1.0,1.305256,0.136828,inf,0.563625
7,"(shrimp, almonds)",(avocado),0.766133,0.585067,0.585067,0.763662,1.305256,0.136828,1.755675,1.0
8,"(avocado, almonds)",(shrimp),0.585067,1.0,0.585067,1.0,1.0,0.0,inf,0.0
9,(shrimp),"(avocado, almonds)",1.0,0.585067,0.585067,0.585067,1.0,0.0,1.0,0.0


In [63]:
df_lift[df_lift['lift'] > df_lift['lift'].mean()]


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(avocado),(almonds),0.585067,0.766133,0.585067,1.0,1.305256,0.136828,inf,0.563625
1,(almonds),(avocado),0.766133,0.585067,0.585067,0.763662,1.305256,0.136828,1.755675,1.0
6,"(shrimp, avocado)",(almonds),0.585067,0.766133,0.585067,1.0,1.305256,0.136828,inf,0.563625
7,"(shrimp, almonds)",(avocado),0.766133,0.585067,0.585067,0.763662,1.305256,0.136828,1.755675,1.0
10,(avocado),"(shrimp, almonds)",0.585067,0.766133,0.585067,1.0,1.305256,0.136828,inf,0.563625
11,(almonds),"(shrimp, avocado)",0.766133,0.585067,0.585067,0.763662,1.305256,0.136828,1.755675,1.0


**The items that should be put together :**

**✈ *avocado*** WITH ***almonds***

**✈ (*shrimp* + *avocado*)** WITH ***almonds***

**✈ (*shrimp* + *almonds*)** WITH ***avocado***

**✈ *avocado*** WITH **(*shrimp* + *almonds*)**

**✈ *almonds*** WITH **(*shrimp* + *avocado*)**