In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("Online retail.csv")                

In [5]:
df

Unnamed: 0,Products
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."
...,...
7496,"butter,light mayo,fresh bread"
7497,"burgers,frozen vegetables,eggs,french fries,ma..."
7498,chicken
7499,"escalope,green tea"


## Data Preprocessing:

In [8]:
df.drop_duplicates(inplace=True)

In [10]:
def txt_split(txt):
    return txt.split(',')

In [12]:
data = df['Products'].apply(txt_split)

In [14]:
data

0       [shrimp, almonds, avocado, vegetables mix, gre...
1                              [burgers, meatballs, eggs]
2                                               [chutney]
3                                       [turkey, avocado]
4       [mineral water, milk, energy bar, whole wheat ...
                              ...                        
7493    [burgers, salmon, pancakes, french fries, froz...
7494    [turkey, burgers, dessert wine, shrimp, pasta,...
7496                    [butter, light mayo, fresh bread]
7497    [burgers, frozen vegetables, eggs, french frie...
7500    [eggs, frozen smoothie, yogurt cake, low fat y...
Name: Products, Length: 5176, dtype: object

In [16]:
data.isna().sum()
# No missing Value

0

In [18]:
from mlxtend.preprocessing import TransactionEncoder

In [20]:
te = TransactionEncoder()
encoded_df = te.fit_transform(data)
encoded_df

array([[False,  True,  True, ...,  True, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False,  True, False]])

In [22]:
te.columns_

[' asparagus',
 'almonds',
 'antioxydant juice',
 'asparagus',
 'avocado',
 'babies food',
 'bacon',
 'barbecue sauce',
 'black tea',
 'blueberries',
 'body spray',
 'bramble',
 'brownies',
 'bug spray',
 'burger sauce',
 'burgers',
 'butter',
 'cake',
 'candy bars',
 'carrots',
 'cauliflower',
 'cereals',
 'champagne',
 'chicken',
 'chili',
 'chocolate',
 'chocolate bread',
 'chutney',
 'cider',
 'clothes accessories',
 'cookies',
 'cooking oil',
 'corn',
 'cottage cheese',
 'cream',
 'dessert wine',
 'eggplant',
 'eggs',
 'energy bar',
 'energy drink',
 'escalope',
 'extra dark chocolate',
 'flax seed',
 'french fries',
 'french wine',
 'fresh bread',
 'fresh tuna',
 'fromage blanc',
 'frozen smoothie',
 'frozen vegetables',
 'gluten free bar',
 'grated cheese',
 'green beans',
 'green grapes',
 'green tea',
 'ground beef',
 'gums',
 'ham',
 'hand protein bar',
 'herb & pepper',
 'honey',
 'hot dogs',
 'ketchup',
 'light cream',
 'light mayo',
 'low fat yogurt',
 'magazines',
 'mashe

In [24]:
data = pd.DataFrame(encoded_df, columns=te.columns_)
data

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5171,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5172,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,True,False,False,False
5173,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5174,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [26]:
data.replace([True,False],[1,0], inplace=True)

  data.replace([True,False],[1,0], inplace=True)


In [28]:
data.shape


(5176, 120)

### Association Rule Mining:

In [36]:
from mlxtend.frequent_patterns import association_rules,apriori

In [38]:
scores = apriori(data,min_support=0.05, use_colnames=True)
scores



Unnamed: 0,support,itemsets
0,0.113794,(burgers)
1,0.103555,(cake)
2,0.054869,(champagne)
3,0.083849,(chicken)
4,0.205178,(chocolate)
5,0.060665,(cookies)
6,0.07187,(cooking oil)
7,0.208076,(eggs)
8,0.083849,(escalope)
9,0.19262,(french fries)


In [40]:
rules = association_rules(scores, metric="lift", min_threshold=1)
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(mineral water),(chocolate),0.073223,0.244201,1.190193
1,(chocolate),(mineral water),0.073223,0.356874,1.190193
2,(chocolate),(spaghetti),0.055835,0.272128,1.185635
3,(spaghetti),(chocolate),0.055835,0.243266,1.185635
4,(mineral water),(eggs),0.070131,0.233892,1.12407
5,(eggs),(mineral water),0.070131,0.337047,1.12407
6,(eggs),(spaghetti),0.051391,0.246982,1.076078
7,(spaghetti),(eggs),0.051391,0.223906,1.076078
8,(frozen vegetables),(mineral water),0.050425,0.388393,1.29531
9,(mineral water),(frozen vegetables),0.050425,0.16817,1.29531


In [42]:
rules.sort_values(by = 'lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
13,(spaghetti),(ground beef),0.229521,0.135819,0.055835,0.243266,1.791102,0.024661,1.141988,0.57326
12,(ground beef),(spaghetti),0.135819,0.229521,0.055835,0.411095,1.791102,0.024661,1.308326,0.511102
10,(ground beef),(mineral water),0.135819,0.299845,0.058733,0.432432,1.442184,0.018008,1.233606,0.354795
11,(mineral water),(ground beef),0.299845,0.135819,0.058733,0.195876,1.442184,0.018008,1.074686,0.437914
14,(mineral water),(milk),0.299845,0.170015,0.067813,0.22616,1.330231,0.016835,1.072553,0.354566
15,(milk),(mineral water),0.170015,0.299845,0.067813,0.398864,1.330231,0.016835,1.164718,0.299103
8,(frozen vegetables),(mineral water),0.12983,0.299845,0.050425,0.388393,1.29531,0.011496,1.144778,0.262
9,(mineral water),(frozen vegetables),0.299845,0.12983,0.050425,0.16817,1.29531,0.011496,1.046091,0.32562
17,(milk),(spaghetti),0.170015,0.229521,0.050039,0.294318,1.282316,0.011017,1.091822,0.265259
16,(spaghetti),(milk),0.229521,0.170015,0.050039,0.218013,1.282316,0.011017,1.06138,0.285745


#### Analysis and Interpretation:

#### Rules Summary

1) (spaghetti → ground beef)

Support: 0.0558 (5.58% of transactions include spaghetti and ground beef together)
Confidence: 0.243 (24.3% of transactions with spaghetti also have ground beef)
Lift: 1.79 (Spaghetti is 79% more likely to be bought with ground beef than randomly)
Interpretation: There’s a moderate association between spaghetti and ground beef, which could be useful for cross-promotion strategies.

2) (ground beef → spaghetti)

Support: 0.0558
Confidence: 0.411 (41.1% of ground beef buyers also buy spaghetti) 
Lift: 1.79
Interpretation: Ground beef buyers are more likely to buy spaghetti, which could suggest a strong relation (like spaghetti bolognese).

3) (ground beef → mineral water)

Support: 0.0587
Confidence: 0.432 (43.2% of ground beef buyers also buy mineral water)
Lift: 1.44
Interpretation: There is a fairly strong connection between ground beef and mineral water. Mineral water could be used as an upsell or complementary product for beef buyers.

4) (mineral water → ground beef)

Support: 0.0587
Confidence: 0.195 (19.6% of mineral water buyers buy ground beef)
Lift: 1.44
Interpretation: This rule has a lower confidence, suggesting fewer mineral water buyers choose ground beef, but the lift suggests it’s still a meaningful relationship.

5) (mineral water → milk)

Support: 0.0678 (6.78% of transactions include both mineral water and milk)
Confidence: 0.226 (22.6% of mineral water buyers also buy milk)
Lift: 1.33 (There’s a 33% increased chance that mineral water buyers also purchase milk)
Interpretation: This shows a moderate relationship between mineral water and milk. They could be placed together in promotions.

6) (milk → mineral water)

Support: 0.0678
Confidence: 0.399 (39.9% of milk buyers also buy mineral water)
Lift: 1.33
Interpretation: Milk buyers have a relatively high likelihood of buying mineral water, which could indicate complementary consumption patterns.

7) (mineral water → frozen vegetables)

Support: 0.0504
Confidence: 0.168 (16.8% of mineral water buyers purchase frozen vegetables)
Lift: 1.29
Interpretation: This is a weaker relationship, but the lift still indicates that mineral water buyers are more likely to purchase frozen vegetables than random customers.

8) (frozen vegetables → mineral water)

Support: 0.0504
Confidence: 0.388 (38.8% of frozen vegetable buyers purchase mineral water)
Lift: 1.29
Interpretation: Frozen vegetable buyers have a higher likelihood of purchasing mineral water, which could be used to target health-conscious shoppers.

9) (spaghetti → milk)

Support: 0.0500
Confidence: 0.218 (21.8% of spaghetti buyers also buy milk)
Lift: 1.28
Interpretation: Spaghetti and milk show a moderate relationship, with some potential for promotions or bundling, but it’s not as strong as other relationships in the data.

### Conclusion:

------The most interesting associations seem to be between ground beef and spaghetti (with high confidence and lift values), suggesting a strong relationship between these two items.-----
Mineral water appears frequently with other products like ground beef, milk, and frozen vegetables. This indicates that mineral water is often bought in combination with various other items, possibly making it a good candidate for bundling or cross-promotions.------
Frozen vegetables and mineral water are moderately associated, suggesting an opportunity to target health-conscious buyers with joint offers.
These insights can be used for improving product placements, advertising strategies, or designing special promotions.

## Interview questions


#### 1.What is lift and why is it important in Association rules?

### 2.What is support and Confidence. How do you calculate them?

### 3.What are some limitations or challenges of Association rules mining?

🔹 4. Handling of Rare Items
Problem: Important but rare associations (like fraud patterns) might be missed due to low support.

Solution: Use lower support thresholds, or weighted association rule mining that emphasizes rare but valuable patterns.

🔹 5. Scalability and Performance
Problem: For large datasets with many items, the number of possible itemsets is exponentially large.

Solution: Use optimized algorithms (e.g., Apriori, FP-Growth) or distributed computing.