## Imports and read data

In [1]:
import pandas as pd
import re
from helpers import read_and_describe, calculate_frequent_patterns, generate_association_rules_hybrid
from IPython.display import display, HTML
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 37)

import warnings
warnings.filterwarnings("ignore") # ignores warnings

df = read_and_describe('cleaned_data.csv')

Data types:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 527507 entries, 0 to 527506
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    527507 non-null  int64         
 1   StockCode    527507 non-null  object        
 2   Description  527507 non-null  object        
 3   Quantity     527507 non-null  int64         
 4   InvoiceDate  527507 non-null  datetime64[ns]
 5   UnitPrice    527507 non-null  float64       
 6   CustomerID   527507 non-null  object        
 7   Country      527507 non-null  object        
 8   TotalPrice   527507 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(2), object(4)
memory usage: 36.2+ MB
None

Numeric data description:

           InvoiceNo       Quantity      UnitPrice     TotalPrice
count  527507.000000  527507.000000  527507.000000  527507.000000
mean   559986.722531      10.270061       3.266506      18.992156
std     13435.

### Unifying colors

In [2]:
colors = [
    "Red", "Green", "Blue", "Yellow", "Orange", "Purple", "Pink", "Brown", 
    "Black", "White", "Gray", "Violet", "Indigo", "Magenta", "Cyan", 
    "Turquoise", "Teal", "Lime", "Coral", "Navy", "Maroon", "Olive", 
    "Silver", "Gold", "Beige", "Mint", "Lavender", "Peach", "Ivory", 
    "Charcoal", "Aqua", "Sapphire", "Emerald", "Ruby", "Amber", "CHOCOLATE", 
    "STRAWBERRY", "WOODLAND", "ROSES", "STAR", "HEART"
]

# Regular expression pattern to match colors
color_pattern = r'\b(?:' + '|'.join(colors) + r')\b'

# Function to remove color names
def replace_color_with_colored(description):
    return re.sub(color_pattern, 'COLORED', description, flags=re.IGNORECASE)

# Apply the function to the Description column
df['Description'] = df['Description'].apply(replace_color_with_colored)
df['Description'] = df['Description'].str.strip()
df = df.groupby(['InvoiceNo', 'Description']).agg({
    'Quantity': 'sum', 
    'InvoiceDate': 'first', 
    'StockCode': 'first', 
    'UnitPrice': 'first', 
    'CustomerID': 'first', 
    'Country': 'first',
    'TotalPrice': 'sum'
}).reset_index()

# Frequent patterns for different countries

In [3]:
countries = ['United Kingdom', 'EIRE','Belgium', 'France', 'Germany', 'Netherlands', 'Norway', 'Spain', 'Switzerland', 'Australia']
min_support_values = {
    'United Kingdom': 2.5,
    'EIRE': 7, # Ireland 
    'Belgium': 8,
    'France': 6,
    'Germany': 4,
    'Netherlands': 11,
    'Norway': 13,
    'Spain': 6,
    'Switzerland': 13,
    'Australia': 8.5,
    'Portugal': 15
}

for country in countries:
    min_confidence = 0.5
    confidence_weight = 0.7

    # Display the country name
    display(HTML(f"<div style='font-size: 30px;'>{country}</div>"))

    # Generate and display the association rules
    frequent_patterns = calculate_frequent_patterns(df[df['Country'] == country], min_support_values.get(country)/100)
    rules = generate_association_rules_hybrid(frequent_patterns, min_confidence, confidence_weight)
    rules = rules.sort_values(by='combined_score', ascending=False);
    print(f"Number of rules: {rules.shape[0]}")
    display(rules.reset_index(drop=True).head(10))

Number of rules: 26


Unnamed: 0,rule,support,confidence,lift,combined_score
0,{'GARDENERS KNEELING PAD CUP OF TEA'} => {'GARDENERS KNEELING PAD KEEP CALM'},0.030207,0.721333,14.386503,0.804933
1,{'CHARLOTTE BAG COLORED POLKADOT'} => {'COLORED RETROSPOT CHARLOTTE BAG'},0.026745,0.711738,14.116541,0.789291
2,{'CHARLOTTE BAG COLORED POLKADOT'} => {'COLORED CHARLOTTE BAG'},0.026186,0.69688,12.735831,0.733239
3,{'GARDENERS KNEELING PAD KEEP CALM'} => {'GARDENERS KNEELING PAD CUP OF TEA'},0.030207,0.60245,14.386503,0.721715
4,{'COLORED RETROSPOT CHARLOTTE BAG'} => {'COLORED CHARLOTTE BAG'},0.032552,0.645626,11.799139,0.666392
5,{'CHARLOTTE BAG SUKI DESIGN'} => {'COLORED CHARLOTTE BAG'},0.028978,0.645522,11.797251,0.666257
6,{'COLORED RETROSPOT CHARLOTTE BAG'} => {'CHARLOTTE BAG COLORED POLKADOT'},0.026745,0.530454,14.116541,0.662392
7,"{'PAPER CHAIN KIT VINTAGE CHRISTMAS'} => {""PAPER CHAIN KIT 50'S CHRISTMAS""}",0.030039,0.6725,10.7062,0.649068
8,{'COLORED CHARLOTTE BAG'} => {'COLORED RETROSPOT CHARLOTTE BAG'},0.032552,0.594898,11.799139,0.630882
9,{'CHARLOTTE BAG SUKI DESIGN'} => {'COLORED RETROSPOT CHARLOTTE BAG'},0.026521,0.590796,11.717782,0.625321


Number of rules: 9


Unnamed: 0,rule,support,confidence,lift,combined_score
0,{'REGENCY MILK JUG COLORED'} => {'REGENCY SUGAR BOWL COLORED'},0.070922,0.8,9.024,0.86
1,{'REGENCY SUGAR BOWL COLORED'} => {'REGENCY MILK JUG COLORED'},0.070922,0.8,9.024,0.86
2,{'REGENCY SUGAR BOWL COLORED'} => {'COLORED REGENCY TEACUP AND SAUCER'},0.074468,0.84,4.644706,0.692553
3,{'REGENCY MILK JUG COLORED'} => {'COLORED REGENCY TEACUP AND SAUCER'},0.070922,0.8,4.423529,0.654682
4,{'REGENCY SUGAR BOWL COLORED'} => {'REGENCY CAKESTAND 3 TIER'},0.074468,0.84,3.384,0.636288
5,{'REGENCY TEA PLATE COLORED'} => {'COLORED REGENCY TEACUP AND SAUCER'},0.08156,0.766667,4.239216,0.623122
6,{'REGENCY TEA PLATE COLORED'} => {'REGENCY CAKESTAND 3 TIER'},0.08156,0.766667,3.088571,0.571769
7,{'COLORED REGENCY TEACUP AND SAUCER'} => {'REGENCY CAKESTAND 3 TIER'},0.113475,0.627451,2.527731,0.449288
8,{'SET OF 3 REGENCY CAKE TINS'} => {'REGENCY CAKESTAND 3 TIER'},0.070922,0.571429,2.302041,0.4


Number of rules: 33


Unnamed: 0,rule,support,confidence,lift,combined_score
0,"{'SPACEBOY LUNCH BOX', 'ROUND SNACK BOXES SET OF4 COLORED'} => {'DOLLY GIRL LUNCH BOX'}",0.132653,0.866667,3.692754,0.838217
1,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'DOLLY GIRL LUNCH BOX'} => {'ROUND SNACK BOXES SET OF4 COLORED'}",0.091837,1.0,2.578947,0.816523
2,"{'LUNCH BAG COLORED', 'ROUND SNACK BOXES SET OF 4 FRUITS'} => {'ROUND SNACK BOXES SET OF4 COLORED'}",0.081633,1.0,2.578947,0.816523
3,{'LUNCH BAG COLORED RETROSPOT'} => {'LUNCH BAG COLORED'},0.081633,0.666667,4.355556,0.766667
4,{'SPACEBOY LUNCH BOX'} => {'DOLLY GIRL LUNCH BOX'},0.183673,0.782609,3.334594,0.742387
5,{'DOLLY GIRL LUNCH BOX'} => {'SPACEBOY LUNCH BOX'},0.183673,0.782609,3.334594,0.742387
6,{'COLORED CHARLOTTE BAG'} => {'COLORED RETROSPOT CHARLOTTE BAG'},0.081633,0.615385,4.307692,0.725826
7,"{'DOLLY GIRL LUNCH BOX', 'ROUND SNACK BOXES SET OF4 COLORED'} => {'SPACEBOY LUNCH BOX'}",0.132653,0.764706,3.258312,0.721978
8,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'SPACEBOY LUNCH BOX'} => {'ROUND SNACK BOXES SET OF4 COLORED'}",0.091837,0.9,2.321053,0.719889
9,{'COLORED RETROSPOT CHARLOTTE BAG'} => {'COLORED CHARLOTTE BAG'},0.081633,0.571429,4.307692,0.695057


Number of rules: 35


Unnamed: 0,rule,support,confidence,lift,combined_score
0,{'CHILDRENS CUTLERY SPACEBOY'} => {'CHILDRENS CUTLERY DOLLY GIRL'},0.065445,0.925926,12.632275,0.948148
1,{'CHILDRENS CUTLERY DOLLY GIRL'} => {'CHILDRENS CUTLERY SPACEBOY'},0.065445,0.892857,12.632275,0.925
2,"{'SET/6 COLORED SPOTTY PAPER CUPS', 'SET/20 COLORED RETROSPOT PAPER NAPKINS'} => {'SET/6 COLORED SPOTTY PAPER PLATES'}",0.102094,0.975,7.449,0.814467
3,"{'SET/6 COLORED SPOTTY PAPER PLATES', 'SET/20 COLORED RETROSPOT PAPER NAPKINS'} => {'SET/6 COLORED SPOTTY PAPER CUPS'}",0.102094,0.975,6.897222,0.79658
4,{'SET/6 COLORED SPOTTY PAPER PLATES'} => {'SET/6 COLORED SPOTTY PAPER CUPS'},0.125654,0.96,6.791111,0.78264
5,{'SET/6 COLORED SPOTTY PAPER CUPS'} => {'SET/6 COLORED SPOTTY PAPER PLATES'},0.125654,0.888889,6.791111,0.732862
6,"{'SET/6 COLORED SPOTTY PAPER PLATES'} => {'SET/6 COLORED SPOTTY PAPER CUPS', 'SET/20 COLORED RETROSPOT PAPER NAPKINS'}",0.102094,0.78,7.449,0.677967
7,"{'SET/6 COLORED SPOTTY PAPER PLATES', 'SET/6 COLORED SPOTTY PAPER CUPS'} => {'SET/20 COLORED RETROSPOT PAPER NAPKINS'}",0.102094,0.8125,5.96875,0.65273
8,{'SET/6 COLORED SPOTTY PAPER PLATES'} => {'SET/20 COLORED RETROSPOT PAPER NAPKINS'},0.104712,0.8,5.876923,0.641003
9,"{'SET/6 COLORED SPOTTY PAPER CUPS'} => {'SET/6 COLORED SPOTTY PAPER PLATES', 'SET/20 COLORED RETROSPOT PAPER NAPKINS'}",0.102094,0.722222,6.897222,0.619635


Number of rules: 27


Unnamed: 0,rule,support,confidence,lift,combined_score
0,{'SET/6 COLORED SPOTTY PAPER CUPS'} => {'SET/6 COLORED SPOTTY PAPER PLATES'},0.047404,0.875,14.908654,0.893991
1,{'CHILDRENS CUTLERY SPACEBOY'} => {'CHILDRENS CUTLERY DOLLY GIRL'},0.040632,0.818182,15.758893,0.872727
2,{'CHILDRENS CUTLERY DOLLY GIRL'} => {'CHILDRENS CUTLERY SPACEBOY'},0.040632,0.782609,15.758893,0.847826
3,{'SET/6 COLORED SPOTTY PAPER PLATES'} => {'SET/6 COLORED SPOTTY PAPER CUPS'},0.047404,0.807692,14.908654,0.846876
4,{'COLORED RETROSPOT CHARLOTTE BAG'} => {'COLORED CHARLOTTE BAG'},0.063205,0.875,6.460417,0.710084
5,{'CHARLOTTE BAG SUKI DESIGN'} => {'COLORED CHARLOTTE BAG'},0.040632,0.857143,6.328571,0.694714
6,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'PLASTERS IN TIN COLORED ANIMALS'} => {'ROUND SNACK BOXES SET OF4 COLORED'}",0.042889,0.863636,3.41599,0.635856
7,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'COLORED CHARLOTTE BAG'} => {'ROUND SNACK BOXES SET OF4 COLORED'}",0.040632,0.857143,3.390306,0.630751
8,{'ROUND SNACK BOXES SET OF 4 FRUITS'} => {'ROUND SNACK BOXES SET OF4 COLORED'},0.13544,0.833333,3.296131,0.612034
9,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'PLASTERS IN TIN CIRCUS PARADE'} => {'ROUND SNACK BOXES SET OF4 COLORED'}",0.042889,0.826087,3.267469,0.606338


Number of rules: 34


Unnamed: 0,rule,support,confidence,lift,combined_score
0,{'PLASTERS IN TIN SPACEBOY'} => {'ROUND SNACK BOXES SET OF4 COLORED'},0.129032,1.0,3.72,0.969617
1,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'SPACEBOY LUNCH BOX'} => {'DOLLY GIRL LUNCH BOX'}",0.11828,0.916667,3.875,0.941667
2,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'DOLLY GIRL LUNCH BOX'} => {'SPACEBOY LUNCH BOX'}",0.11828,1.0,3.321429,0.891489
3,"{'ROUND SNACK BOXES SET OF 4 FRUITS', 'SPACEBOY LUNCH BOX'} => {'ROUND SNACK BOXES SET OF4 COLORED'}",0.11828,0.916667,3.41,0.850518
4,{'COLORED RETROSPOT CHARLOTTE BAG'} => {'ROUND SNACK BOXES SET OF4 COLORED'},0.11828,0.916667,3.41,0.850518
5,{'DOLLY GIRL LUNCH BOX'} => {'SPACEBOY LUNCH BOX'},0.225806,0.954545,3.170455,0.830077
6,"{'SPACEBOY LUNCH BOX', 'ROUND SNACK BOXES SET OF4 COLORED'} => {'DOLLY GIRL LUNCH BOX'}",0.16129,0.833333,3.522727,0.814281
7,"{'DOLLY GIRL LUNCH BOX', 'ROUND SNACK BOXES SET OF4 COLORED'} => {'SPACEBOY LUNCH BOX'}",0.16129,0.9375,3.113839,0.807048
8,{'ROUND SNACK BOXES SET OF 4 FRUITS'} => {'ROUND SNACK BOXES SET OF4 COLORED'},0.16129,0.882353,3.282353,0.801477
9,{'COLORED TOADSTOOL LED NIGHT LIGHT'} => {'ROUND SNACK BOXES SET OF4 COLORED'},0.129032,0.857143,3.188571,0.765447


Number of rules: 28


Unnamed: 0,rule,support,confidence,lift,combined_score
0,"{'HOT WATER BOTTLE TEA AND SYMPATHY'} => {'RECIPE BOX RETROSPOT', 'RECIPE BOX PANTRY COLORED DESIGN'}",0.15625,1.0,6.4,1.0
1,"{'RECIPE BOX RETROSPOT'} => {'HOT WATER BOTTLE TEA AND SYMPATHY', 'RECIPE BOX PANTRY COLORED DESIGN'}",0.15625,1.0,6.4,1.0
2,"{'HOT WATER BOTTLE TEA AND SYMPATHY', 'RECIPE BOX PANTRY COLORED DESIGN'} => {'RECIPE BOX RETROSPOT'}",0.15625,1.0,6.4,1.0
3,"{'RECIPE BOX RETROSPOT', 'RECIPE BOX PANTRY COLORED DESIGN'} => {'HOT WATER BOTTLE TEA AND SYMPATHY'}",0.15625,1.0,6.4,1.0
4,{'RECIPE BOX RETROSPOT'} => {'HOT WATER BOTTLE TEA AND SYMPATHY'},0.15625,1.0,6.4,1.0
5,{'HOT WATER BOTTLE TEA AND SYMPATHY'} => {'RECIPE BOX RETROSPOT'},0.15625,1.0,6.4,1.0
6,"{'RECIPE BOX RETROSPOT', 'HOT WATER BOTTLE TEA AND SYMPATHY'} => {'RECIPE BOX PANTRY COLORED DESIGN'}",0.15625,1.0,5.333333,0.936413
7,{'HOT WATER BOTTLE TEA AND SYMPATHY'} => {'RECIPE BOX PANTRY COLORED DESIGN'},0.15625,1.0,5.333333,0.936413
8,"{'COLORED TOADSTOOL LED NIGHT LIGHT', 'CHILDS BREAKFAST SET DOLLY GIRL'} => {'CHILDS BREAKFAST SET SPACEBOY'}",0.15625,1.0,5.333333,0.936413
9,{'RECIPE BOX RETROSPOT'} => {'RECIPE BOX PANTRY COLORED DESIGN'},0.15625,1.0,5.333333,0.936413


Number of rules: 31


Unnamed: 0,rule,support,confidence,lift,combined_score
0,"{""POPPY'S PLAYHOUSE KITCHEN""} => {""POPPY'S PLAYHOUSE BEDROOM""}",0.079545,1.0,11.0,1.0
1,"{""POPPY'S PLAYHOUSE BEDROOM""} => {""POPPY'S PLAYHOUSE KITCHEN""}",0.079545,0.875,11.0,0.9125
2,"{'6 RIBBONS RUSTIC CHARM', 'PACK OF 72 RETROSPOT CAKE CASES'} => {'ASSORTED COLOUR BIRD ORNAMENT'}",0.068182,1.0,7.333333,0.877778
3,{'SET/5 COLORED RETROSPOT LID GLASS BOWLS'} => {'JAM MAKING SET WITH JARS'},0.068182,1.0,6.769231,0.858974
4,{'SET OF 6 GIRLS CELEBRATION CANDLES'} => {'SET/10 COLORED POLKADOT PARTY CANDLES'},0.068182,0.857143,9.428571,0.847619
5,"{'ASSORTED COLOUR BIRD ORNAMENT', 'PACK OF 72 RETROSPOT CAKE CASES'} => {'6 RIBBONS RUSTIC CHARM'}",0.068182,1.0,5.866667,0.828889
6,{'PLASTERS IN TIN CIRCUS PARADE'} => {'PLASTERS IN TIN SKULLS'},0.068182,0.857143,7.542857,0.784762
7,{'SET/10 COLORED POLKADOT PARTY CANDLES'} => {'SET OF 6 GIRLS CELEBRATION CANDLES'},0.068182,0.75,9.428571,0.772619
8,{'COLORED RETROSPOT TAPE'} => {'6 RIBBONS RUSTIC CHARM'},0.068182,0.857143,5.028571,0.700952
9,{'LUNCH BAG COLORED RETROSPOT'} => {'LUNCH BAG COLORED POLKADOT'},0.068182,0.75,6.6,0.678333


Number of rules: 34


Unnamed: 0,rule,support,confidence,lift,combined_score
0,{'COLORED RETROSPOT BOWL'} => {'COLORED POLKADOT BOWL'},0.16,1.0,5.0,1.0
1,{'COLORED POLKADOT BOWL'} => {'COLORED RETROSPOT BOWL'},0.16,0.8,5.0,0.86
2,{'COLORED TOADSTOOL LED NIGHT LIGHT'} => {'ROUND SNACK BOXES SET OF4 COLORED'},0.16,1.0,2.941176,0.825
3,"{'PLASTERS IN TIN CIRCUS PARADE', 'ROUND SNACK BOXES SET OF4 COLORED'} => {'PLASTERS IN TIN SPACEBOY'}",0.14,1.0,2.777778,0.811111
4,{'PLASTERS IN TIN CIRCUS PARADE'} => {'PLASTERS IN TIN SPACEBOY'},0.2,0.909091,2.525253,0.72601
5,"{'PLASTERS IN TIN SPACEBOY', 'ROUND SNACK BOXES SET OF4 COLORED'} => {'PLASTERS IN TIN CIRCUS PARADE'}",0.14,0.777778,3.535354,0.719949
6,{'ROUND SNACK BOXES SET OF 4 FRUITS'} => {'ROUND SNACK BOXES SET OF4 COLORED'},0.16,0.888889,2.614379,0.719444
7,"{'PLASTERS IN TIN COLORED ANIMALS', 'PLASTERS IN TIN CIRCUS PARADE'} => {'PLASTERS IN TIN SPACEBOY'}",0.16,0.888889,2.469136,0.707099
8,{'PLASTERS IN TIN CIRCUS PARADE'} => {'PLASTERS IN TIN COLORED ANIMALS'},0.18,0.818182,2.406417,0.652273
9,"{'PLASTERS IN TIN CIRCUS PARADE', 'PLASTERS IN TIN SPACEBOY'} => {'PLASTERS IN TIN COLORED ANIMALS'}",0.16,0.8,2.352941,0.635


Number of rules: 74


Unnamed: 0,rule,support,confidence,lift,combined_score
0,"{'DOLLY GIRL LUNCH BOX', 'REGENCY CAKESTAND 3 TIER'} => {'COLORED REGENCY TEACUP AND SAUCER', 'SPACEBOY LUNCH BOX'}",0.089286,1.0,11.2,1.0
1,"{'SPACEBOY LUNCH BOX', 'REGENCY CAKESTAND 3 TIER'} => {'COLORED REGENCY TEACUP AND SAUCER', 'DOLLY GIRL LUNCH BOX'}",0.089286,1.0,11.2,1.0
2,"{'COLORED REGENCY TEACUP AND SAUCER', 'DOLLY GIRL LUNCH BOX'} => {'SPACEBOY LUNCH BOX', 'REGENCY CAKESTAND 3 TIER'}",0.089286,1.0,11.2,1.0
3,"{'COLORED REGENCY TEACUP AND SAUCER', 'SPACEBOY LUNCH BOX'} => {'DOLLY GIRL LUNCH BOX', 'REGENCY CAKESTAND 3 TIER'}",0.089286,1.0,11.2,1.0
4,"{'COLORED REGENCY TEACUP AND SAUCER', 'REGENCY CAKESTAND 3 TIER'} => {'DOLLY GIRL LUNCH BOX'}",0.089286,1.0,9.333333,0.927273
5,"{'COLORED REGENCY TEACUP AND SAUCER', 'DOLLY GIRL LUNCH BOX'} => {'REGENCY CAKESTAND 3 TIER'}",0.089286,1.0,9.333333,0.927273
6,"{'CIRCUS PARADE LUNCH BOX', 'DOLLY GIRL LUNCH BOX'} => {'SPACEBOY LUNCH BOX'}",0.089286,1.0,9.333333,0.927273
7,"{'SPACEBOY LUNCH BOX', 'CIRCUS PARADE LUNCH BOX'} => {'DOLLY GIRL LUNCH BOX'}",0.089286,1.0,9.333333,0.927273
8,"{'COLORED REGENCY TEACUP AND SAUCER', 'SPACEBOY LUNCH BOX'} => {'DOLLY GIRL LUNCH BOX'}",0.089286,1.0,9.333333,0.927273
9,"{'COLORED REGENCY TEACUP AND SAUCER', 'DOLLY GIRL LUNCH BOX'} => {'SPACEBOY LUNCH BOX'}",0.089286,1.0,9.333333,0.927273


#### The table above shows a set of association rules per country
#### Each country minimum support has been chosen after try and error mainly to generate a reasonable ammount of itemsets (Not very low, not huge)
#### Minimum confidence is 0.5
#### The association rules was sorted based on a hybrid score between confidence and lift where confidence has 70% weight