# Association Rules.

### Task-1 Data Preprocessing :


In [4]:
# import the required libraries.
import pandas as pd
import warnings

In [16]:
# Load and read the given dataset
warnings.simplefilter('ignore')
data=pd.read_csv('Online retail.csv')


In [18]:
data.head()

Unnamed: 0,Products
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."


In [20]:
data

Unnamed: 0,Products
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."
...,...
7496,"butter,light mayo,fresh bread"
7497,"burgers,frozen vegetables,eggs,french fries,ma..."
7498,chicken
7499,"escalope,green tea"


In [22]:
# Handling Missing Values.
data.dropna(inplace=True)

In [24]:
data.head(10)

Unnamed: 0,Products
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."
5,low fat yogurt
6,"whole wheat pasta,french fries"
7,"soup,light cream,shallot"
8,"frozen vegetables,spaghetti,green tea"
9,french fries


In [26]:
# Removing Duplicate Rows
data.drop_duplicates(inplace=True)

In [28]:
data.head(20)

Unnamed: 0,Products
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."
5,low fat yogurt
6,"whole wheat pasta,french fries"
7,"soup,light cream,shallot"
8,"frozen vegetables,spaghetti,green tea"
9,french fries


#### Converting the Data into Appropriate format.

In [30]:
data.columns=['Items']

In [32]:
# Splitting the Items into Lists.
trans=data['Items'].str.split(',').apply(lambda x:[item.strip() for item in x if item])

In [34]:
# Converting to One-Hot Encoding format.
from mlxtend.preprocessing import TransactionEncoder
Te=TransactionEncoder()
data_Te=Te.fit(trans).transform(trans)
df=pd.DataFrame(data_Te,columns=Te.columns_)

In [36]:
data.head(20)

Unnamed: 0,Items
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."
5,low fat yogurt
6,"whole wheat pasta,french fries"
7,"soup,light cream,shallot"
8,"frozen vegetables,spaghetti,green tea"
9,french fries


In [38]:
df.head(10)

Unnamed: 0,almonds,antioxydant juice,asparagus,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,body spray,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,True,True,False,True,False,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,True,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


### Task-2 Association Rule Mining :


#### 1.Implementing an Apriori algorithm using tool like python with libraries such as Pandas and Mlxtend etc.

In [42]:
# Import required libraries
from mlxtend.frequent_patterns import apriori,association_rules

In [44]:
# Applying Apriori algorith to find frequent itemsets.
freq_itemsets=apriori(df,min_support=0.01,use_colnames=True)

In [46]:
freq_itemsets.head(10)

Unnamed: 0,support,itemsets
0,0.029366,(almonds)
1,0.011206,(antioxydant juice)
2,0.045981,(avocado)
3,0.012558,(bacon)
4,0.015456,(barbecue sauce)
5,0.020479,(black tea)
6,0.013138,(blueberries)
7,0.016229,(body spray)
8,0.045015,(brownies)
9,0.012365,(bug spray)


In [48]:
# Generation of Association Rules.
rules=association_rules(freq_itemsets,metric="confidence",min_threshold=0.2)

In [50]:
# Filtering rules Based on Metrics.
rules=rules[rules['lift']>1.0]

In [52]:
rules[['antecedents','consequents','support','confidence','lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(almonds),(mineral water),0.011012,0.375000,1.250644
1,(avocado),(chocolate),0.010240,0.222689,1.085347
2,(avocado),(french fries),0.011592,0.252101,1.308800
3,(avocado),(milk),0.010819,0.235294,1.383957
4,(avocado),(mineral water),0.016036,0.348739,1.163064
...,...,...,...,...,...
351,"(shrimp, mineral water)",(spaghetti),0.012365,0.367816,1.602539
352,"(soup, spaghetti)",(mineral water),0.010819,0.523364,1.745448
353,"(soup, mineral water)",(spaghetti),0.010819,0.323699,1.410327
354,"(spaghetti, tomatoes)",(mineral water),0.013524,0.451613,1.506152


In [54]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(almonds),(mineral water),0.029366,0.299845,0.011012,0.375000,1.250644,1.0,0.002207,1.120247,0.206476,0.034608,0.107340,0.205863
1,(avocado),(chocolate),0.045981,0.205178,0.010240,0.222689,1.085347,1.0,0.000805,1.022528,0.082426,0.042502,0.022032,0.136297
2,(avocado),(french fries),0.045981,0.192620,0.011592,0.252101,1.308800,1.0,0.002735,1.079531,0.247313,0.051064,0.073672,0.156141
3,(avocado),(milk),0.045981,0.170015,0.010819,0.235294,1.383957,1.0,0.003002,1.085364,0.290806,0.052731,0.078650,0.149465
4,(avocado),(mineral water),0.045981,0.299845,0.016036,0.348739,1.163064,1.0,0.002248,1.075076,0.146960,0.048623,0.069833,0.201109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,"(shrimp, mineral water)",(spaghetti),0.033617,0.229521,0.012365,0.367816,1.602539,1.0,0.004649,1.218758,0.389069,0.049307,0.179493,0.210844
352,"(soup, spaghetti)",(mineral water),0.020672,0.299845,0.010819,0.523364,1.745448,1.0,0.004621,1.468952,0.436096,0.034934,0.319242,0.279723
353,"(soup, mineral water)",(spaghetti),0.033423,0.229521,0.010819,0.323699,1.410327,1.0,0.003148,1.139255,0.301005,0.042912,0.122234,0.185419
354,"(spaghetti, tomatoes)",(mineral water),0.029946,0.299845,0.013524,0.451613,1.506152,1.0,0.004545,1.276752,0.346431,0.042761,0.216763,0.248358


In [56]:
rules_sort=rules.sort_values(by='lift',ascending=False)

In [58]:
rules_sort[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head()

Unnamed: 0,antecedents,consequents,support,confidence,lift
177,(whole wheat pasta),(olive oil),0.011012,0.271429,3.094525
127,(herb & pepper),(ground beef),0.022798,0.343023,2.525588
315,"(shrimp, mineral water)",(frozen vegetables),0.010433,0.310345,2.390394
305,"(frozen vegetables, spaghetti)",(ground beef),0.012558,0.321782,2.369196
345,"(spaghetti, milk)",(olive oil),0.01024,0.204633,2.332999


In [60]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(almonds),(mineral water),0.029366,0.299845,0.011012,0.375000,1.250644,1.0,0.002207,1.120247,0.206476,0.034608,0.107340,0.205863
1,(avocado),(chocolate),0.045981,0.205178,0.010240,0.222689,1.085347,1.0,0.000805,1.022528,0.082426,0.042502,0.022032,0.136297
2,(avocado),(french fries),0.045981,0.192620,0.011592,0.252101,1.308800,1.0,0.002735,1.079531,0.247313,0.051064,0.073672,0.156141
3,(avocado),(milk),0.045981,0.170015,0.010819,0.235294,1.383957,1.0,0.003002,1.085364,0.290806,0.052731,0.078650,0.149465
4,(avocado),(mineral water),0.045981,0.299845,0.016036,0.348739,1.163064,1.0,0.002248,1.075076,0.146960,0.048623,0.069833,0.201109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,"(shrimp, mineral water)",(spaghetti),0.033617,0.229521,0.012365,0.367816,1.602539,1.0,0.004649,1.218758,0.389069,0.049307,0.179493,0.210844
352,"(soup, spaghetti)",(mineral water),0.020672,0.299845,0.010819,0.523364,1.745448,1.0,0.004621,1.468952,0.436096,0.034934,0.319242,0.279723
353,"(soup, mineral water)",(spaghetti),0.033423,0.229521,0.010819,0.323699,1.410327,1.0,0.003148,1.139255,0.301005,0.042912,0.122234,0.185419
354,"(spaghetti, tomatoes)",(mineral water),0.029946,0.299845,0.013524,0.451613,1.506152,1.0,0.004545,1.276752,0.346431,0.042761,0.216763,0.248358


### Task-3 Analysis and Interpretation :

In [62]:
print(rules_sort.head(10))

                            antecedents          consequents  \
177                 (whole wheat pasta)          (olive oil)   
127                     (herb & pepper)        (ground beef)   
315             (shrimp, mineral water)  (frozen vegetables)   
305      (frozen vegetables, spaghetti)        (ground beef)   
345                   (spaghetti, milk)          (olive oil)   
304    (frozen vegetables, ground beef)          (spaghetti)   
339               (soup, mineral water)               (milk)   
189                (french fries, eggs)            (burgers)   
331          (spaghetti, mineral water)        (ground beef)   
314  (frozen vegetables, mineral water)             (shrimp)   

     antecedent support  consequent support   support  confidence      lift  \
177            0.040572            0.087713  0.011012    0.271429  3.094525   
127            0.066461            0.135819  0.022798    0.343023  2.525588   
315            0.033617            0.129830  0.010433    0