# UK_FP

In [34]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
import time

file_path = 'OnlineRetail_1225.csv'
data = pd.read_csv(file_path)
data.head(5)

Unnamed: 0,InvoiceNo,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,WHITE HANGING HEART T-LIGHT HOLDER,6,01-12-2010 08:26,2.55,17850.0,United Kingdom
1,536365,WHITE METAL LANTERN,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
2,536365,CREAM CUPID HEARTS COAT HANGER,8,01-12-2010 08:26,2.75,17850.0,United Kingdom
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
4,536365,RED WOOLLY HOTTIE WHITE HEART.,6,01-12-2010 08:26,3.39,17850.0,United Kingdom


In [35]:
UK_data = data[data['Country'] == 'United Kingdom']

# 移除含有NaN值的行（特別是在'Description'列中）
UK_data = UK_data.dropna(subset=['Description'])

# 將所有描述轉換為字符串類型
UK_data['Description'] = UK_data['Description'].astype(str)

# 按InvoiceNo分組，並將每組的描述字段合併為一個字串
UK_grouped_data = UK_data.groupby('InvoiceNo')['Description'].apply(lambda x: ', '.join(x)).reset_index()
UK_grouped_data.head()

Unnamed: 0,InvoiceNo,Description
0,536365,"WHITE HANGING HEART T-LIGHT HOLDER, WHITE META..."
1,536366,"HAND WARMER UNION JACK, HAND WARMER RED POLKA DOT"
2,536367,"ASSORTED COLOUR BIRD ORNAMENT, POPPY'S PLAYHOU..."
3,536368,"JAM MAKING SET WITH JARS, RED COAT RACK PARIS ..."
4,536369,BATH BUILDING BLOCK WORD


In [36]:
# Splitting the Description column into individual items
UK_grouped_data['Items'] = UK_grouped_data['Description'].str.split(',')


# Start the timer for FP-growth algorithm
start_time = time.time()

# Extracting the list of transactions
transactions = UK_grouped_data['Items'].tolist()

# TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# FP-growth 
frequent_itemsets = fpgrowth(df, min_support=0.01, use_colnames=True)

# Generating the association rules
UK_FP_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)


# Stop the timer and print the time taken for FP-growth
fp_growth_time = time.time() - start_time
print(f"Time taken for FP-growth: {fp_growth_time} seconds")

Time taken for FP-growth: 2.279085159301758 seconds


In [37]:
# Calculating confidence and lift
# These metrics are already included in the rules DataFrame
UK_FP_rules = UK_FP_rules[['antecedents', 'consequents', 'confidence', 'lift']]
UK_FP_rules

Unnamed: 0,antecedents,consequents,confidence,lift
0,( POPPY'S PLAYHOUSE KITCHEN),( POPPY'S PLAYHOUSE BEDROOM ),0.694595,42.762192
1,( POPPY'S PLAYHOUSE BEDROOM ),( POPPY'S PLAYHOUSE KITCHEN),0.717877,42.762192
2,( RED HANGING HEART T-LIGHT HOLDER),( WHITE HANGING HEART T-LIGHT HOLDER),0.623123,6.836055
3,( JUMBO STORAGE BAG SUKI),( JUMBO BAG RED RETROSPOT),0.609756,7.140821
4,"( JUMBO SHOPPER VINTAGE RED PAISLEY, JUMBO ST...",( JUMBO BAG RED RETROSPOT),0.741344,8.681842
...,...,...,...,...
237,( REGENCY TEA PLATE ROSES ),( REGENCY TEA PLATE PINK),0.605479,50.740559
238,( REGENCY TEA PLATE PINK),( REGENCY TEA PLATE ROSES ),0.840304,50.740559
239,( GARDENERS KNEELING PAD CUP OF TEA ),( GARDENERS KNEELING PAD KEEP CALM ),0.698910,17.870044
240,( JUMBO BAG VINTAGE CHRISTMAS ),( JUMBO BAG 50'S CHRISTMAS ),0.664384,23.206044


In [38]:
# 對規則按confidence進行降序排序
sorted_rules_by_confidence = UK_FP_rules.sort_values(by='confidence', ascending=False)

# 選取前幾條規則
top_10_rules_by_confidence = sorted_rules_by_confidence.head(5)
top_10_rules_by_confidence

Unnamed: 0,antecedents,consequents,confidence,lift
103,( FANCY FONT BIRTHDAY CARD),( ),1.0,41.042831
54,( AIRLINE LOUNGE),(METAL SIGN),1.0,76.0
173,"(COFFEE, SET 3 RETROSPOT TEA)",(SUGAR),1.0,57.696335
169,( SET 3 RETROSPOT TEA),(SUGAR),1.0,57.696335
177,( SET 3 RETROSPOT TEA),"(COFFEE, SUGAR)",1.0,57.696335


# UK_Apriori

In [40]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import time

file_path = 'OnlineRetail_1225.csv'
data = pd.read_csv(file_path)

UK_data = data[data['Country'] == 'United Kingdom']

# 移除含有NaN值的行（特別是在'Description'列中）
UK_data = UK_data.dropna(subset=['Description'])

# 將所有描述轉換為字符串類型
UK_data['Description'] = UK_data['Description'].astype(str)

# 按InvoiceNo分組，並將每組的描述字段合併為一個字串
UK_grouped_data = UK_data.groupby('InvoiceNo')['Description'].apply(lambda x: ', '.join(x)).reset_index()
UK_grouped_data.head()

Unnamed: 0,InvoiceNo,Description
0,536365,"WHITE HANGING HEART T-LIGHT HOLDER, WHITE META..."
1,536366,"HAND WARMER UNION JACK, HAND WARMER RED POLKA DOT"
2,536367,"ASSORTED COLOUR BIRD ORNAMENT, POPPY'S PLAYHOU..."
3,536368,"JAM MAKING SET WITH JARS, RED COAT RACK PARIS ..."
4,536369,BATH BUILDING BLOCK WORD


In [41]:
# Splitting the Description column into individual items
UK_grouped_data['Items'] = UK_grouped_data['Description'].str.split(',')


# Start the timer for FP-growth algorithm
start_time = time.time()

# Extracting the list of transactions
transactions = UK_grouped_data['Items'].tolist()

# TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# FP-growth 
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

# Generating the association rules
UK_apriori_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)


# Stop the timer and print the time taken for FP-growth
apriori_time = time.time() - start_time
print(f"Time taken for apriori: {apriori_time} seconds")

Time taken for apriori: 39.11946702003479 seconds


In [42]:
# Calculating confidence and lift
# These metrics are already included in the rules DataFrame
UK_apriori_rules = UK_apriori_rules[['antecedents', 'consequents', 'confidence', 'lift']]
UK_apriori_rules

Unnamed: 0,antecedents,consequents,confidence,lift
0,( BIRTHDAY CARD),( ),0.648000,26.595754
1,( FANCY FONT BIRTHDAY CARD),( ),1.000000,41.042831
2,( AIRLINE LOUNGE),(METAL SIGN),1.000000,76.000000
3,(METAL SIGN),( AIRLINE LOUNGE),0.986207,76.000000
4,( ALARM CLOCK BAKELIKE CHOCOLATE),( ALARM CLOCK BAKELIKE GREEN),0.625337,16.119796
...,...,...,...,...
237,"( WOODLAND CHARLOTTE BAG, STRAWBERRY CHARLOTT...","( CHARLOTTE BAG SUKI DESIGN, RED RETROSPOT CH...",0.638122,30.574345
238,"( JUMBO SHOPPER VINTAGE RED PAISLEY, JUMBO ST...",( JUMBO BAG PINK POLKADOT),0.673077,13.186325
239,"( JUMBO BAG PINK POLKADOT, JUMBO SHOPPER VINT...",( JUMBO STORAGE BAG SUKI),0.700000,13.936766
240,"( JUMBO BAG PINK POLKADOT, JUMBO STORAGE BAG ...",( JUMBO SHOPPER VINTAGE RED PAISLEY),0.638021,12.679873


In [43]:
# 對規則按confidence進行降序排序
sorted_rules_by_confidence = UK_apriori_rules.sort_values(by='confidence', ascending=False)

# 選取前幾條規則
top_10_rules_by_confidence = sorted_rules_by_confidence.head(5)
top_10_rules_by_confidence

Unnamed: 0,antecedents,consequents,confidence,lift
214,"(COFFEE, SET 3 RETROSPOT TEA)",(SUGAR),1.0,57.696335
2,( AIRLINE LOUNGE),(METAL SIGN),1.0,76.0
72,(SUGAR),(COFFEE),1.0,46.893617
216,"( SET 3 RETROSPOT TEA, SUGAR)",(COFFEE),1.0,46.893617
1,( FANCY FONT BIRTHDAY CARD),( ),1.0,41.042831


# Germany_FP

In [44]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
import time

file_path = 'OnlineRetail_1225.csv'
data = pd.read_csv(file_path)
data.head(5)

Unnamed: 0,InvoiceNo,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,WHITE HANGING HEART T-LIGHT HOLDER,6,01-12-2010 08:26,2.55,17850.0,United Kingdom
1,536365,WHITE METAL LANTERN,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
2,536365,CREAM CUPID HEARTS COAT HANGER,8,01-12-2010 08:26,2.75,17850.0,United Kingdom
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
4,536365,RED WOOLLY HOTTIE WHITE HEART.,6,01-12-2010 08:26,3.39,17850.0,United Kingdom


In [45]:
Germany_data = data[data['Country'] == 'Germany']

# 移除含有NaN值的行（特別是在'Description'列中）
Germany_data = Germany_data.dropna(subset=['Description'])

# 將所有描述轉換為字符串類型
Germany_data['Description'] = Germany_data['Description'].astype(str)

# 按InvoiceNo分組，並將每組的描述字段合併為一個字串
Germany_grouped_data = Germany_data.groupby('InvoiceNo')['Description'].apply(lambda x: ', '.join(x)).reset_index()
Germany_grouped_data.head()

Unnamed: 0,InvoiceNo,Description
0,536527,"SET OF 6 T-LIGHTS SANTA, ROTATING SILVER ANGEL..."
1,536840,"JAM MAKING SET PRINTED, JAM JAR WITH PINK LID,..."
2,536861,"FELTCRAFT 6 FLOWER FRIENDS, 6 RIBBONS RUSTIC C..."
3,536967,"POSTAGE, JUMBO BAG RED RETROSPOT"
4,536983,"WOODLAND PARTY BAG + STICKER SET, HAND WARMER ..."


In [46]:
# Splitting the Description column into individual items
Germany_grouped_data['Items'] = Germany_grouped_data['Description'].str.split(',')


# Start the timer for FP-growth algorithm
start_time = time.time()

# Extracting the list of transactions
transactions = Germany_grouped_data['Items'].tolist()

# TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# FP-growth 
frequent_itemsets = fpgrowth(df, min_support=0.01, use_colnames=True)

# Generating the association rules
Germany_FP_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)


# Stop the timer and print the time taken for FP-growth
fp_growth_time = time.time() - start_time
print(f"Time taken for FP-growth: {fp_growth_time} seconds")

Time taken for FP-growth: 0.07642793655395508 seconds


In [47]:
# Calculating confidence and lift
# These metrics are already included in the rules DataFrame
Germany_FP_rules = Germany_FP_rules[['antecedents', 'consequents', 'confidence', 'lift']]
Germany_FP_rules

Unnamed: 0,antecedents,consequents,confidence,lift
0,( JUMBO BAG WOODLAND ANIMALS),( POSTAGE),0.883721,1.522525
1,"( ROUND SNACK BOXES SET OF4 WOODLAND , JUMBO ...",( POSTAGE),0.933333,1.608000
2,"( JUMBO BAG WOODLAND ANIMALS, ROUND SNACK BOX...",( POSTAGE),1.000000,1.722857
3,"( WOODLAND CHARLOTTE BAG, JUMBO BAG WOODLAND ...",( POSTAGE),0.944444,1.627143
4,"( JUMBO BAG WOODLAND ANIMALS, PLASTERS IN TIN...",( POSTAGE),0.818182,1.409610
...,...,...,...,...
2523,"( ROUND SNACK BOXES SET OF4 WOODLAND , SPACEB...",( POSTAGE),1.000000,1.722857
2524,( VINTAGE DOILY TRAVEL SEWING KIT),( POSTAGE),1.000000,1.722857
2525,( SPACEBOY ROCKET LOLLY MAKERS),( POSTAGE),1.000000,1.722857
2526,( MINI LIGHTS WOODLAND MUSHROOMS),( POSTAGE),0.833333,1.435714


In [48]:
# 對規則按confidence進行降序排序
sorted_rules_by_confidence = Germany_FP_rules.sort_values(by='confidence', ascending=False)

# 選取前幾條規則
top_10_rules_by_confidence = sorted_rules_by_confidence.head(5)
top_10_rules_by_confidence

Unnamed: 0,antecedents,consequents,confidence,lift
1264,"( WHITE SPOT BLUE CERAMIC DRAWER KNOB, BLUE S...",( POSTAGE),1.0,1.722857
1591,"( POSTAGE, RED SPOT CERAMIC DRAWER KNOB, RED...",( WHITE SPOT RED CERAMIC DRAWER KNOB),1.0,25.125
1584,"( WHITE SPOT BLUE CERAMIC DRAWER KNOB, RED SP...",( POSTAGE),1.0,1.722857
453,"( 6 RIBBONS RUSTIC CHARM, BLUE HARMONICA IN B...",( POSTAGE),1.0,1.722857
1574,"( WHITE SPOT BLUE CERAMIC DRAWER KNOB, RED SP...",( WHITE SPOT RED CERAMIC DRAWER KNOB),1.0,25.125


# Germany_Apriori

In [49]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import time

file_path = 'OnlineRetail_1225.csv'
data = pd.read_csv(file_path)

Germany_data = data[data['Country'] == 'Germany']

# 移除含有NaN值的行（特別是在'Description'列中）
Germany_data = Germany_data.dropna(subset=['Description'])

# 將所有描述轉換為字符串類型
Germany_data['Description'] = Germany_data['Description'].astype(str)

# 按InvoiceNo分組，並將每組的描述字段合併為一個字串
Germany_grouped_data = Germany_data.groupby('InvoiceNo')['Description'].apply(lambda x: ', '.join(x)).reset_index()
Germany_grouped_data.head()

Unnamed: 0,InvoiceNo,Description
0,536527,"SET OF 6 T-LIGHTS SANTA, ROTATING SILVER ANGEL..."
1,536840,"JAM MAKING SET PRINTED, JAM JAR WITH PINK LID,..."
2,536861,"FELTCRAFT 6 FLOWER FRIENDS, 6 RIBBONS RUSTIC C..."
3,536967,"POSTAGE, JUMBO BAG RED RETROSPOT"
4,536983,"WOODLAND PARTY BAG + STICKER SET, HAND WARMER ..."


In [50]:
# Splitting the Description column into individual items
Germany_grouped_data['Items'] = Germany_grouped_data['Description'].str.split(',')


# Start the timer for FP-growth algorithm
start_time = time.time()

# Extracting the list of transactions
transactions = Germany_grouped_data['Items'].tolist()

# TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# FP-growth 
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

# Generating the association rules
Germany_apriori_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)


# Stop the timer and print the time taken for FP-growth
apriori_time = time.time() - start_time
print(f"Time taken for apriori: {apriori_time} seconds")

Time taken for apriori: 0.4527277946472168 seconds


In [51]:
# Calculating confidence and lift
# These metrics are already included in the rules DataFrame
Germany_apriori_rules = Germany_apriori_rules[['antecedents', 'consequents', 'confidence', 'lift']]
Germany_apriori_rules

Unnamed: 0,antecedents,consequents,confidence,lift
0,( 10 COLOUR SPACEBOY PEN),( POSTAGE),1.000000,1.722857
1,( 12 PENCIL SMALL TUBE WOODLAND),( POSTAGE),0.777778,1.340000
2,( 12 PENCILS TALL TUBE WOODLAND),( POSTAGE),0.800000,1.378286
3,( 3 HOOK HANGER MAGIC GARDEN),( POSTAGE),0.705882,1.216134
4,( 3 PIECE SPACEBOY COOKIE CUTTER SET),( POSTAGE),0.750000,1.292143
...,...,...,...,...
2523,"( RED SPOT CERAMIC DRAWER KNOB, BLUE SPOT CER...","( POSTAGE, RED STRIPE CERAMIC DRAWER KNOB, W...",0.777778,52.111111
2524,"( BLUE SPOT CERAMIC DRAWER KNOB, WHITE SPOT R...","( POSTAGE, RED SPOT CERAMIC DRAWER KNOB, RED...",0.700000,60.300000
2525,"( WHITE SPOT BLUE CERAMIC DRAWER KNOB, RED ST...","( BLUE SPOT CERAMIC DRAWER KNOB, POSTAGE, RE...",0.700000,46.900000
2526,"( BLUE SPOT CERAMIC DRAWER KNOB, RED STRIPE C...","( POSTAGE, RED SPOT CERAMIC DRAWER KNOB, WHI...",0.777778,58.625000


In [52]:
# 對規則按confidence進行降序排序
sorted_rules_by_confidence = Germany_apriori_rules.sort_values(by='confidence', ascending=False)

# 選取前幾條規則
top_10_rules_by_confidence = sorted_rules_by_confidence.head(5)
top_10_rules_by_confidence

Unnamed: 0,antecedents,consequents,confidence,lift
0,( 10 COLOUR SPACEBOY PEN),( POSTAGE),1.0,1.722857
621,"( PINK VINTAGE SPOT BEAKER, GREEN VINTAGE SPO...",( BLUE VINTAGE SPOT BEAKER),1.0,43.071429
1760,"( SCANDINAVIAN REDS RIBBONS, CHOCOLATE BOX RI...",( ROUND SNACK BOXES SET OF4 WOODLAND ),1.0,5.432432
1758,"( SCANDINAVIAN REDS RIBBONS, ROUND SNACK BOXE...",( ROUND SNACK BOXES SET OF 4 FRUITS ),1.0,8.148649
630,"( POSTAGE, PINK VINTAGE SPOT BEAKER)",( BLUE VINTAGE SPOT BEAKER),1.0,43.071429


# France_FP

In [53]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
import time

file_path = 'OnlineRetail_1225.csv'
data = pd.read_csv(file_path)
data.head(5)

Unnamed: 0,InvoiceNo,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,WHITE HANGING HEART T-LIGHT HOLDER,6,01-12-2010 08:26,2.55,17850.0,United Kingdom
1,536365,WHITE METAL LANTERN,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
2,536365,CREAM CUPID HEARTS COAT HANGER,8,01-12-2010 08:26,2.75,17850.0,United Kingdom
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE,6,01-12-2010 08:26,3.39,17850.0,United Kingdom
4,536365,RED WOOLLY HOTTIE WHITE HEART.,6,01-12-2010 08:26,3.39,17850.0,United Kingdom


In [54]:
France_data = data[data['Country'] == 'France']

France_data = France_data.dropna(subset=['Description'])
France_data['Description'] = France_data['Description'].astype(str)

France_grouped_data = France_data.groupby('InvoiceNo')['Description'].apply(lambda x: ', '.join(x)).reset_index()
France_grouped_data.head()


Unnamed: 0,InvoiceNo,Description
0,536370,"ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELIK..."
1,536852,"PICTURE DOMINOES, MINI JIGSAW SPACEBOY, MINI J..."
2,536974,"EDWARDIAN PARASOL BLACK, EDWARDIAN PARASOL PIN..."
3,537065,"HOT WATER BOTTLE BABUSHKA , BREAD BIN DINER ST..."
4,537463,"JAM MAKING SET PRINTED, SET/4 SKULL BADGES, RO..."


In [55]:
# Splitting the Description column into individual items
France_grouped_data['Items'] = France_grouped_data['Description'].str.split(',')


# Start the timer for FP-growth algorithm
start_time = time.time()

# Extracting the list of transactions
transactions = France_grouped_data['Items'].tolist()

# TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# FP-growth 
frequent_itemsets = fpgrowth(df, min_support=0.01, use_colnames=True)

# Generating the association rules
France_FP_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)


# Stop the timer and print the time taken for FP-growth
fp_growth_time = time.time() - start_time
print(f"Time taken for FP-growth: {fp_growth_time} seconds")

Time taken for FP-growth: 0.5657899379730225 seconds


In [56]:
# Calculating confidence and lift
# These metrics are already included in the rules DataFrame
France_FP_rules = France_FP_rules[['antecedents', 'consequents', 'confidence', 'lift']]
France_FP_rules

Unnamed: 0,antecedents,consequents,confidence,lift
0,( RED TOADSTOOL LED NIGHT LIGHT),( POSTAGE),0.809524,1.357056
1,"( RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN ...",( POSTAGE),0.947368,1.588134
2,"( POSTAGE, RED TOADSTOOL LED NIGHT LIGHT, PL...",( PLASTERS IN TIN CIRCUS PARADE ),0.611111,4.204809
3,"( PLASTERS IN TIN CIRCUS PARADE , RED TOADSTO...",( POSTAGE),1.000000,1.676364
4,"( PLASTERS IN TIN CIRCUS PARADE , RED TOADSTO...",( POSTAGE),1.000000,1.676364
...,...,...,...,...
38211,( MINI LIGHTS WOODLAND MUSHROOMS),( RABBIT NIGHT LIGHT),0.625000,5.649510
38212,( MINI LIGHTS WOODLAND MUSHROOMS),( POSTAGE),0.750000,1.257273
38213,"( MINI LIGHTS WOODLAND MUSHROOMS, RABBIT NIGH...",( POSTAGE),0.700000,1.173455
38214,"( RED TOADSTOOL LED NIGHT LIGHT, MINI LIGHTS ...",( RABBIT NIGHT LIGHT),0.833333,7.532680


In [57]:
# 對規則按confidence進行降序排序
sorted_rules_by_confidence = France_FP_rules.sort_values(by='confidence', ascending=False)

# 選取前幾條規則
top_10_rules_by_confidence = sorted_rules_by_confidence.head(5)
top_10_rules_by_confidence

Unnamed: 0,antecedents,consequents,confidence,lift
28875,"( PLASTERS IN TIN SKULLS, SPACEBOY LUNCH BOX ...","( PLASTERS IN TIN WOODLAND ANIMALS, CIRCUS PA...",1.0,51.222222
14962,"( ALARM CLOCK BAKELIKE ORANGE, JUMBO BAG SPAC...",( PLASTERS IN TIN SPACEBOY),1.0,8.537037
8176,"( LUNCH BOX I LOVE LONDON, PLASTERS IN TIN CI...",( PLASTERS IN TIN SPACEBOY),1.0,8.537037
29609,"( ALARM CLOCK BAKELIKE CHOCOLATE, PLASTERS IN...",( POSTAGE),1.0,1.676364
8178,"( LUNCH BOX I LOVE LONDON, PLASTERS IN TIN SP...",( PLASTERS IN TIN CIRCUS PARADE ),1.0,6.880597


# France_Apriori

In [58]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import time

file_path = 'OnlineRetail_1225.csv'
data = pd.read_csv(file_path)

France_data = data[data['Country'] == 'France']

France_data = France_data.dropna(subset=['Description'])
France_data['Description'] = France_data['Description'].astype(str)

France_grouped_data = France_data.groupby('InvoiceNo')['Description'].apply(lambda x: ', '.join(x)).reset_index()
France_grouped_data.head()

Unnamed: 0,InvoiceNo,Description
0,536370,"ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELIK..."
1,536852,"PICTURE DOMINOES, MINI JIGSAW SPACEBOY, MINI J..."
2,536974,"EDWARDIAN PARASOL BLACK, EDWARDIAN PARASOL PIN..."
3,537065,"HOT WATER BOTTLE BABUSHKA , BREAD BIN DINER ST..."
4,537463,"JAM MAKING SET PRINTED, SET/4 SKULL BADGES, RO..."


In [59]:
# Splitting the Description column into individual items
France_grouped_data['Items'] = France_grouped_data['Description'].str.split(',')


# Start the timer for FP-growth algorithm
start_time = time.time()

# Extracting the list of transactions
transactions = France_grouped_data['Items'].tolist()

# TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# FP-growth 
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

# Generating the association rules
France_apriori_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)


# Stop the timer and print the time taken for FP-growth
apriori_time = time.time() - start_time
print(f"Time taken for apriori: {apriori_time} seconds")

Time taken for apriori: 1.9229347705841064 seconds


In [60]:
# Calculating confidence and lift
# These metrics are already included in the rules DataFrame
France_apriori_rules = France_apriori_rules[['antecedents', 'consequents', 'confidence', 'lift']]
France_apriori_rules

Unnamed: 0,antecedents,consequents,confidence,lift
0,( BIRTHDAY CARD),( ),0.857143,43.904762
1,( ),( BIRTHDAY CARD),0.666667,43.904762
2,( ELEPHANT),( ),1.000000,51.222222
3,( ),( ELEPHANT),0.666667,51.222222
4,( ),( POSTAGE),0.777778,1.303838
...,...,...,...,...
38211,"( PACK OF 6 SKULL PAPER PLATES, SET OF 9 HEAR...","( PACK OF 6 SKULL PAPER CUPS, SET/6 RED SPOTT...",1.000000,46.100000
38212,"( PACK OF 6 SKULL PAPER CUPS, SET OF 9 HEART ...","( SET/6 RED SPOTTY PAPER PLATES, SET/20 RED R...",0.625000,28.812500
38213,"( SET OF 9 HEART SHAPED BALLOONS, SET/20 RED ...","( PACK OF 6 SKULL PAPER CUPS, SET/6 RED SPOTT...",0.625000,28.812500
38214,"( PACK OF 6 SKULL PAPER PLATES, SET OF 9 HEAR...","( PACK OF 6 SKULL PAPER CUPS, SET/6 RED SPOTT...",0.625000,32.013889


In [61]:
# 對規則按confidence進行降序排序
sorted_rules_by_confidence = France_apriori_rules.sort_values(by='confidence', ascending=False)

# 選取前幾條規則
top_10_rules_by_confidence = sorted_rules_by_confidence.head(5)
top_10_rules_by_confidence

Unnamed: 0,antecedents,consequents,confidence,lift
18453,"( ALARM CLOCK BAKELIKE PINK, PLASTERS IN TIN ...",( ALARM CLOCK BAKELIKE GREEN),1.0,12.459459
14231,"( SET OF 9 BLACK SKULL BALLOONS, PACK OF 6 SK...",( PACK OF 6 SKULL PAPER PLATES),1.0,20.954545
7953,"( ALARM CLOCK BAKELIKE ORANGE, SPACEBOY LUNCH...",( ALARM CLOCK BAKELIKE RED ),1.0,12.805556
30047,"( ALARM CLOCK BAKELIKE PINK, PLASTERS IN TIN ...","( PLASTERS IN TIN CIRCUS PARADE , ALARM CLOCK...",1.0,35.461538
14229,"( PACK OF 6 SKULL PAPER PLATES, RETROSPOT PAR...","( PACK OF 20 SKULL PAPER NAPKINS, PACK OF 6 S...",1.0,28.8125
