In [1]:
#!pip install -r requirements.txt
#!pip install pyfim==6.28

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import sparse

from itertools import permutations,combinations
from collections import Counter

from fim import arules, fpgrowth

In [3]:
path = "..../"
articles = pd.read_csv(path + "articles.csv")
customers = pd.read_csv(path + "customers.csv")
transactions = pd.read_csv(path + "transactions_train.csv")

In [20]:
# set an article name that contains colour scheme and graphical appearance
articles["article_name"] = articles["colour_group_name"] + " " + articles["graphical_appearance_name"] + " " + articles["prod_name"]

In [5]:
# merge relevant article data into transactions
transactions_merged = pd.merge(transactions[["customer_id", "article_id", "t_dat"]],
                      articles[["article_id","article_name", "product_type_name", "product_group_name"]], on="article_id")

Analysis by article granularity

In [6]:
# remove multiple identical items bought in the same transaction
transactions_merged2=transactions_merged.drop_duplicates(["t_dat", "customer_id", "article_name"])

In [82]:
transactions_merged2.head()

Unnamed: 0,customer_id,article_id,t_dat,article_name,product_type_name,product_group_name
0,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,663713001,2018-09-20,Black Solid Atlanta Push Body Harlow,Underwear body,Underwear
1,3681748607f3287d2c3a65e00bb5fb153de30e9becf158...,663713001,2018-09-20,Black Solid Atlanta Push Body Harlow,Underwear body,Underwear
2,4ef5967ff17bf474bffebe5b16bd54878e1d4105f7b4ed...,663713001,2018-09-20,Black Solid Atlanta Push Body Harlow,Underwear body,Underwear
3,6b7b10d2d47516c82a6f97332478dab748070f09693f09...,663713001,2018-09-20,Black Solid Atlanta Push Body Harlow,Underwear body,Underwear
4,8ac137752bbe914aa4ae6ad007a9a0c5b67a1ab2b2d474...,663713001,2018-09-20,Black Solid Atlanta Push Body Harlow,Underwear body,Underwear


In [8]:
# group articles bought by the same person on the same day to obtain orders
article_name_df=transactions_merged2.groupby(["t_dat", "customer_id"],as_index=False)["article_name"].agg(','.join)
article_name_df["article_name"]= article_name_df["article_name"].str.split(",", n = 100, expand = False)

In [11]:
# convert orders into array for apriori algorithm
apriori_array = article_name_df["article_name"].to_numpy()

In [12]:
#Apriori report 
report_colnames = {
    'a': 'support_itemset_absolute',
    's': 'support_itemset_relative',
    'S': 'support_itemset_relative_pct',
    'b': 'support_bodyset_absolute',
    'x': 'support_bodyset_relative',
    'X': 'support_bodyset_relative_pct',
    'h': 'support_headitem_absolute',
    'y': 'support_headitem_relative',
    'Y': 'support_headitem_relative_pct',
    'c': 'confidence',
    'C': 'confidence_pct',
    'l': 'lift',
    'L': 'lift_pct',
    'e': 'evaluation',
    'E': 'evaluation_pct',
    'Q': 'xx',
    'S': 'support_emptyset',
    }

In [13]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 2 # minimum length
zmax = 2 # maximum length 
report = "asCl"
algo = "ap" # "fp" or "ap"

In [14]:
# uncomment either to check documentation for apriori or fpgrowth algorithms

#?arules
#?fpgrowth

In [15]:
# start apriori (or fpgrowth) store results in a result dataframe
if algo == "ap":
    result = arules(apriori_array, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(apriori_array, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(88954, 6)


In [24]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(10))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
0,Black Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS,)",7125,0.000785,18.541168,38.860014
1,Dark Blue Solid Jade HW Skinny Denim TRS,"(Black Solid Jade HW Skinny Denim TRS,)",7125,0.000785,16.44585,38.860014
67,Dark Green Other structure Simple as That Triangle Top,"(Dark Green Other structure Simple as that Cheeky Tanga,)",6982,0.000769,64.43337,601.178109
66,Dark Green Other structure Simple as that Cheeky Tanga,"(Dark Green Other structure Simple as That Triangle Top,)",6982,0.000769,71.742704,601.178109
7,Light Blue Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS,)",5954,0.000656,15.493911,43.203379
6,Dark Blue Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS,)",5954,0.000656,18.283995,43.203379
55,Red Other structure Simple as That Triangle Top,"(Red Other structure Simple as that Cheeky Tanga,)",5914,0.000651,60.445626,435.220918
56,Red Other structure Simple as that Cheeky Tanga,"(Red Other structure Simple as That Triangle Top,)",5914,0.000651,46.895567,435.220918
2,Black Solid Luna skinny RW,"(Blue Denim Luna skinny RW,)",5786,0.000637,16.796818,40.859999
3,Blue Denim Luna skinny RW,"(Black Solid Luna skinny RW,)",5786,0.000637,15.500844,40.859999


In [25]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(10))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
69325,Black Melange SPAM fancy shorts BB,"(Light Green Placement print Apollo casual s/s tee,)",5,5.506499e-07,5.376344,1097.037453
84902,Turquoise Solid Carp cut-out,"(Light Purple Stripe Sharkfin bottom,)",5,5.506499e-07,5.494505,6834.396357
69327,Light Grey Front print Apollo casual s/s tee,"(Light Green Placement print Apollo casual s/s tee,)",5,5.506499e-07,5.376344,3149.55914
69329,White Front print Beach price tanktop,"(Light Green Placement print Apollo casual s/s tee,)",5,5.506499e-07,5.376344,3755.24359
69360,White All over pattern HAPPY TEE,"(White All over pattern BOAZ PRICE L/S,)",5,5.506499e-07,5.0,2536.362849
69361,Grey Front print ANDY L/S,"(White All over pattern BOAZ PRICE L/S,)",5,5.506499e-07,5.0,3266.251439
69364,Greenish Khaki Front print ANDY FANCY L/S,"(White All over pattern BOAZ PRICE L/S,)",5,5.506499e-07,5.0,3067.628041
84938,Light Blue Denim SKINNY BASIC 89,"(Light Pink Check Lexi Shirt,)",5,5.506499e-07,5.050505,426.599906
84939,Greenish Khaki All over pattern Cleopatra tunic,"(Light Pink Check Lexi Shirt,)",5,5.506499e-07,5.050505,3886.397449
64880,Dark Green Solid Papi Chulo Top,"(White Solid Tom Denim Shorts,)",5,5.506499e-07,5.050505,227.027178


In [26]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 3 # minimum length
zmax = 3 # maximum length 
report = "asCl"
algo = "ap" # 'fp' or 'ap'

if algo == "ap":
    result = arules(apriori_array, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(apriori_array, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(97578, 6)


In [27]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(10))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
10,Black Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS, Dark Blue Solid Jade HW Skinny Denim TRS)",1369,0.000151,22.992946,48.190394
12,Light Blue Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",1369,0.000151,19.214035,53.576612
11,Dark Blue Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",1369,0.000151,24.161666,57.09177
309,Dark Blue Solid Jade HW Skinny Denim TRS,"(Dark Grey Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",818,9e-05,29.659173,70.081868
310,Dark Grey Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",818,9e-05,11.480702,71.859673
308,Black Solid Jade HW Skinny Denim TRS,"(Dark Grey Solid Jade HW Skinny Denim TRS, Dark Blue Solid Jade HW Skinny Denim TRS)",818,9e-05,25.90247,54.2884
5504,Dark Green Solid The Low Line Highwaist,"(Beige All over pattern The Low Line Highwaist, Dark Green Solid Timeless Sports Top)",705,7.8e-05,90.384615,1367.163896
5503,Beige All over pattern The Low Line Highwaist,"(Dark Green Solid The Low Line Highwaist, Dark Green Solid Timeless Sports Top)",705,7.8e-05,15.28621,201.57061
5502,Dark Green Solid Timeless Sports Top,"(Dark Green Solid The Low Line Highwaist, Beige All over pattern The Low Line Highwaist)",705,7.8e-05,81.221198,1079.641367
5456,Dark Green Solid The Low Line Highwaist,"(Dark Green Solid Timeless Sports Top, Beige All over pattern Timeless Sports Top)",694,7.6e-05,71.768356,1085.573075


In [29]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(10))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
35613,Black Solid Chubba Chubb Highwaist brazili,"(White Solid Knot Bitter Top, Black All over pattern Knot Bitter Top)",5,5.506499e-07,5.050505,113.40131
66147,Yellowish Brown Solid 20 den 2p Tights,"(Black Solid Control Top 50 den 1p Tights, Black Solid 20 den 2p Tights)",5,5.506499e-07,5.208333,260.278475
87794,Dark Blue Melange Calista (1),"(Light Pink Melange Calista., Black Solid Calista cardigan.)",5,5.506499e-07,5.434783,189.075858
66144,Black Solid Lyra HW wide full(1),"(Black Solid Jennifer trouser, Black Solid Drizzle)",5,5.506499e-07,5.208333,380.777769
87800,Light Pink Solid Barry cardigan (1),"(Light Pink Melange Calista., Black Solid Calista cardigan.)",5,5.506499e-07,5.434783,386.746073
66123,Light Beige Melange Son V-neck,"(Light Beige Neps Flock, Dark Blue Melange Flock)",5,5.506499e-07,5.0,92.71165
66121,Black Solid Harrison short sleeve top CN,"(Light Beige Neps Flock, Dark Blue Melange Flock)",5,5.506499e-07,5.0,39.081428
66116,Light Beige Neps Flock,"(Black Solid Son V-neck, Dark Blue Solid Flock (1))",5,5.506499e-07,5.434783,367.451965
66115,Black Solid Son V-neck,"(Light Beige Neps Flock, Dark Blue Solid Flock (1))",5,5.506499e-07,5.263158,86.954905
97577,Dark Turquoise All over pattern 1 pk Sneaker,"(Dark Green All over pattern 1pk Smart, Blue All over pattern 1pk Fun)",5,5.506499e-07,5.434783,16449.599638


Analysis by product type granularity

In [36]:
prod_type_df=transactions_merged2.groupby(["t_dat", "customer_id"],as_index=False)["product_type_name"].agg(','.join)
prod_type_df["product_type_name"]= prod_type_df["product_type_name"].str.split(",", n = 100, expand = False)

In [43]:
len(prod_type_df)

9080179

In [37]:
apriori_array = prod_type_df["product_type_name"].to_numpy()

In [38]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 2 # minimum length
zmax = 2 # maximum length 
report = "asCl"
algo = "ap" # 'fp' or 'ap'


# run apriori
if algo == "ap":
    result = arules(apriori_array, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(apriori_array, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(1840, 6)


In [42]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
2,Trousers,"(Sweater,)",498339,0.054882,29.472831,1.163248
3,Sweater,"(Trousers,)",498339,0.054882,21.66113,1.163248
146,Swimwear bottom,"(Bikini top,)",463018,0.050992,78.190971,12.109265
147,Bikini top,"(Swimwear bottom,)",463018,0.050992,78.970456,12.109265
1,Dress,"(Trousers,)",385073,0.042408,16.737836,0.867179
0,Trousers,"(Dress,)",385073,0.042408,21.971428,0.867179
6,Trousers,"(T-shirt,)",366951,0.040412,29.21196,1.152952
7,T-shirt,"(Trousers,)",366951,0.040412,15.950133,1.152952
12,Trousers,"(Top,)",339933,0.037437,30.821743,1.216488
13,Top,"(Trousers,)",339933,0.037437,14.775751,1.216488


In [41]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
1831,Shorts,"(Keychain,)",8,8.810399e-07,6.666667,0.96758
1778,Blazer,"(Sewing kit,)",8,8.810399e-07,7.619048,1.984547
1835,Hoodie,"(Keychain,)",8,8.810399e-07,6.666667,1.646651
1800,Sandals,"(Marker pen,)",8,8.810399e-07,6.299213,6.070297
1809,Underwear bottom,"(Dog wear,)",8,8.810399e-07,5.063291,0.728212
1815,Dress,"(Headband,)",8,8.810399e-07,5.228758,0.270899
1753,Blouse,"(Baby Bib,)",8,8.810399e-07,5.333333,0.49112
1759,Hoodie,"(Baby Bib,)",8,8.810399e-07,5.333333,1.317321
1830,Vest top,"(Keychain,)",7,7.709099e-07,5.833333,0.637069
1837,Belt,"(Keychain,)",7,7.709099e-07,5.833333,3.166051


In [47]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 3 # minimum length
zmax = 3 # maximum length 
report = "asCl"
algo = "ap" # 'fp' or 'ap'


# run apriori
if algo == "ap":
    result = arules(apriori_array, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(apriori_array, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(66018, 6)


In [48]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
17,Top,"(Sweater, Trousers)",110141,0.01213,22.101622,1.819627
16,Sweater,"(Top, Trousers)",110141,0.01213,32.400797,1.739991
15,Trousers,"(Top, Sweater)",110141,0.01213,40.467128,1.597177
6,Trousers,"(T-shirt, Sweater)",103697,0.01142,40.571301,1.601289
7,Sweater,"(T-shirt, Trousers)",103697,0.01142,28.259086,1.517573
8,T-shirt,"(Sweater, Trousers)",103697,0.01142,20.808526,1.50414
23,Top,"(T-shirt, Trousers)",99154,0.01092,27.021046,2.224644
22,T-shirt,"(Top, Trousers)",99154,0.01092,29.168689,2.108453
21,Trousers,"(Top, T-shirt)",99154,0.01092,38.088251,1.503286
13,Dress,"(Top, Trousers)",96422,0.010619,28.365001,1.469577


In [49]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
62605,Cardigan,"(Backpack, Leggings/Tights)",5,5.506499e-07,5.319149,1.691017
51620,Pyjama bottom,"(Underdress, Other accessories)",5,5.506499e-07,5.0,6.017747
38566,Earring,"(Other shoe, Robe)",5,5.506499e-07,5.494505,3.43078
38564,Belt,"(Other shoe, Robe)",5,5.506499e-07,5.494505,2.982151
41785,Ballerinas,"(Slippers, Hair clip)",5,5.506499e-07,5.208333,12.499696
25114,Sandals,"(Hair ties, Ballerinas)",5,5.506499e-07,5.319149,5.125849
41784,Pyjama bottom,"(Slippers, Hair clip)",5,5.506499e-07,5.208333,6.268487
50770,Pyjama set,"(Costumes, Pyjama bottom)",5,5.506499e-07,5.376344,3.308069
43316,Necklace,"(Flip flop, Hair ties)",5,5.506499e-07,5.154639,4.220701
52218,Sunglasses,"(Kids Underwear top, Swimsuit)",5,5.506499e-07,5.154639,5.088279


## Analysis on age segment 19-26

In [44]:
article_name_age = pd.merge(article_name_df, customers[["customer_id", "age"]], on="customer_id")
prod_type_age = pd.merge(prod_type_df, customers[["customer_id", "age"]], on="customer_id")

In [45]:
article_name_young = article_name_age[ (article_name_age["age"] > 18) &
                     (article_name_age["age"] < 27) ]
prod_type_young = prod_type_age[ (prod_type_age["age"] > 18) &
                     (prod_type_age["age"] < 27) ]

In [46]:
array_young = article_name_young["article_name"].to_numpy()
array_type_young = prod_type_young["product_type_name"].to_numpy()

In [81]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 2 # minimum length
zmax = 2 # maximum length 
report = "asCl"
algo = 'ap' # 'fp' or 'ap'
# run apriori
if algo == "ap":
    result = arules(array_young, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(array_young, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(108996, 6)


In [51]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
0,Black Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS,)",3081,0.001072,19.51853,28.450174
1,Dark Blue Solid Jade HW Skinny Denim TRS,"(Black Solid Jade HW Skinny Denim TRS,)",3081,0.001072,15.628487,28.450174
6,Black Solid Tilda tank,"(White Solid Tilda tank,)",3066,0.001067,35.428703,77.600959
7,White Solid Tilda tank,"(Black Solid Tilda tank,)",3066,0.001067,23.370684,77.600959
41,Dark Green Other structure Simple as That Triangle Top,"(Dark Green Other structure Simple as that Cheeky Tanga,)",2980,0.001037,66.34016,448.644499
40,Dark Green Other structure Simple as that Cheeky Tanga,"(Dark Green Other structure Simple as That Triangle Top,)",2980,0.001037,70.134149,448.644499
49,Red Other structure Simple as That Triangle Top,"(Red Other structure Simple as that Cheeky Tanga,)",2794,0.000972,66.161497,327.390487
50,Red Other structure Simple as that Cheeky Tanga,"(Red Other structure Simple as That Triangle Top,)",2794,0.000972,48.114345,327.390487
3,Light Blue Solid Jade HW Skinny Denim TRS,"(Black Solid Jade HW Skinny Denim TRS,)",2617,0.000911,13.27483,26.449413
2,Black Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS,)",2617,0.000911,18.145888,26.449413


In [52]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
83239,Black Other structure FF Begonia shirt,"(Gold Solid Cool Dylan hoop pk,)",2,6.960131e-07,5.0,316.465749
83240,Gold Solid Flirty Tara necklace,"(Gold Solid Cool Dylan hoop pk,)",2,6.960131e-07,5.0,440.722239
83226,Dark Green Melange CONAN STRUCTURE SWEATER,"(Dark Blue Other structure PORTOBELLO SS MUSCLE POLO,)",2,6.960131e-07,6.666667,2016.497544
83225,Beige Melange MUSHROOM SLIM FIT POLO,"(Dark Blue Other structure PORTOBELLO SS MUSCLE POLO,)",2,6.960131e-07,6.666667,1651.441954
59838,Beige Solid Bird tee,"(Dark Grey Solid Linnea linen t-shirt,)",2,6.960131e-07,5.405405,283.957607
59839,White Solid ESSENTIAL LOVA LINEN,"(Dark Grey Solid Linnea linen t-shirt,)",2,6.960131e-07,5.405405,400.321677
83222,Dark Blue Check Jamie Skinny Cropped Trs Pid,"(Dark Blue Other structure PORTOBELLO SS MUSCLE POLO,)",2,6.960131e-07,6.666667,843.908664
83221,Light Blue Melange MUSHROOM SLIM FIT POLO,"(Dark Blue Other structure PORTOBELLO SS MUSCLE POLO,)",2,6.960131e-07,6.666667,982.396239
83220,Black Neps PETER POLO,"(Dark Blue Other structure PORTOBELLO SS MUSCLE POLO,)",2,6.960131e-07,6.666667,899.376839
83219,Black All over pattern BORIS SS,"(Dark Blue Other structure PORTOBELLO SS MUSCLE POLO,)",2,6.960131e-07,6.666667,676.916137


In [53]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 3 # minimum length
zmax = 3 # maximum length 
report = "asCl"
algo = 'ap' # 'fp' or 'ap'
# run apriori
if algo == "ap":
    result = arules(array_young, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(array_young, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(176784, 6)


In [54]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
0,Black Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS, Dark Blue Solid Jade HW Skinny Denim TRS)",609,0.000212,24.516908,35.7358
2,Light Blue Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",609,0.000212,19.76631,39.383351
1,Dark Blue Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",609,0.000212,23.270921,42.362496
4365,Grey Melange Tilda tank,"(White Solid Tilda tank, Black Solid Tilda tank)",367,0.000128,11.969993,125.532423
4363,Black Solid Tilda tank,"(Grey Melange Tilda tank, White Solid Tilda tank)",367,0.000128,47.600519,104.261391
4364,White Solid Tilda tank,"(Grey Melange Tilda tank, Black Solid Tilda tank)",367,0.000128,40.374037,134.059579
62399,Off White Front print LS Birdie T-Shirt,"(Greenish Khaki All over pattern LS Reggipetto Tie Tanga, Greenish Khaki All over pattern LS Reggipetto Triangle Top)",346,0.00012,20.768307,490.369088
62397,Greenish Khaki All over pattern LS Reggipetto Triangle Top,"(Off White Front print LS Birdie T-Shirt, Greenish Khaki All over pattern LS Reggipetto Tie Tanga)",346,0.00012,91.777188,1181.02363
62398,Greenish Khaki All over pattern LS Reggipetto Tie Tanga,"(Off White Front print LS Birdie T-Shirt, Greenish Khaki All over pattern LS Reggipetto Triangle Top)",346,0.00012,84.596577,1195.715816
420,Dark Grey Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",317,0.00011,10.288867,53.203442


In [55]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
104994,Black Solid Twenty HW tapered,"(Black Dot Pluto RW slacks (1), Black All over pattern Pluto RW slacks (1))",2,6.960131e-07,5.714286,56.699073
104995,White Solid Mona,"(Black Dot Pluto RW slacks (1), Black All over pattern Pluto RW slacks (1))",2,6.960131e-07,5.714286,83.223778
105026,Dark Pink Solid Queen Sweater,"(Red Melange SIRPA, Dark Red Melange SIRPA)",2,6.960131e-07,6.25,278.873156
105023,Yellow Melange SIRPA,"(Red Melange SIRPA, Black Solid SIRPA)",2,6.960131e-07,6.666667,189.670561
105022,Off White Solid Tara turtleneck top,"(Red Melange SIRPA, Black Solid SIRPA)",2,6.960131e-07,6.666667,204.011999
105021,Black Solid Techno,"(Red Melange SIRPA, Black Solid SIRPA)",2,6.960131e-07,6.666667,166.72521
105020,Black Solid Jodi skirt,"(Red Melange SIRPA, Dark Red Melange SIRPA)",2,6.960131e-07,6.25,152.327661
105019,Black Solid JESSY SKIRT,"(Red Melange SIRPA, Dark Red Melange SIRPA)",2,6.960131e-07,6.25,117.535545
105016,Dark Blue Solid Jade Denim TRS,"(Red Melange SIRPA, Dark Red Melange SIRPA)",2,6.960131e-07,6.25,53.578256
105015,Light Blue Solid Jade Denim TRS,"(Red Melange SIRPA, Black Solid SIRPA)",2,6.960131e-07,6.666667,43.165225


In [79]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 2 # minimum length
zmax = 2 # maximum length 
report = "asCl"
algo = "ap" # 'fp' or 'ap'
# run apriori
if algo == "ap":
    result = arules(array_type_young, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(array_type_young, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(1644, 6)


In [80]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
85,Swimwear bottom,"(Bikini top,)",186234,0.064811,77.632111,9.645595
84,Bikini top,"(Swimwear bottom,)",186234,0.064811,80.525613,9.645595
2,Trousers,"(Sweater,)",135896,0.047293,26.65218,1.191212
3,Sweater,"(Trousers,)",135896,0.047293,21.137344,1.191212
6,Trousers,"(T-shirt,)",100136,0.034848,25.914711,1.158251
7,T-shirt,"(Trousers,)",100136,0.034848,15.575212,1.158251
0,Trousers,"(Dress,)",98516,0.034284,20.086531,0.897762
1,Dress,"(Trousers,)",98516,0.034284,15.323237,0.897762
140,Bra,"(Underwear bottom,)",96948,0.033739,44.994152,4.342168
141,Underwear bottom,"(Bra,)",96948,0.033739,32.559436,4.342168


In [68]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
1576,Bag,"(Sewing kit,)",4,1e-06,7.54717,3.19653
1577,Belt,"(Sewing kit,)",3,1e-06,5.660377,2.486341
1639,Top,"(Dog wear,)",3,1e-06,6.666667,0.580182
1640,Blouse,"(Dog wear,)",3,1e-06,6.666667,0.695933
1641,Leggings/Tights,"(Dog wear,)",3,1e-06,6.666667,1.340888
1574,Blazer,"(Sewing kit,)",3,1e-06,5.660377,1.472985
1635,Bracelet,"(Keychain,)",3,1e-06,6.521739,62.260053
1575,Socks,"(Sewing kit,)",3,1e-06,5.660377,1.754865
1609,Slippers,"(Sleeping sack,)",3,1e-06,6.818182,40.67284
1578,Earring,"(Sewing kit,)",3,1e-06,5.660377,2.916259


## Analysis on age segment 46-54

In [60]:
article_name_old = article_name_age[ (article_name_age['age'] > 46) &
                     (article_name_age["age"] < 54) ]
prod_type_old = prod_type_age[ (prod_type_age['age'] > 46) &
                     (prod_type_age["age"] < 54) ]

In [62]:
array_old = article_name_old["article_name"].to_numpy()
array_type_old = prod_type_old["product_type_name"].to_numpy()

In [69]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 2 # minimum length
zmax = 2 # maximum length 
report = "asCl"
algo = 'ap' # 'fp' or 'ap'
# run apriori
if algo == "ap":
    result = arules(array_old, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == "fp":
    result = fpgrowth(array_old, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == "ap":
    colnames = ["consequent", "antecedent"] + [report_colnames.get(k, k) for k in list(report)]
elif algo == "fp":
    colnames = ["itemset"] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values("support_itemset_absolute", ascending=False)
print(df_result.shape)

(678542, 6)


In [70]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
2,Black Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS,)",993,0.000745,19.129262,45.189135
3,Dark Blue Solid Jade HW Skinny Denim TRS,"(Black Solid Jade HW Skinny Denim TRS,)",993,0.000745,17.600142,45.189135
75,Dark Green Other structure Simple as that Cheeky Tanga,"(Dark Green Other structure Simple as That Triangle Top,)",929,0.000697,72.352025,665.964413
76,Dark Green Other structure Simple as That Triangle Top,"(Dark Green Other structure Simple as that Cheeky Tanga,)",929,0.000697,64.157459,665.964413
4,Black Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS,)",869,0.000652,18.187526,42.964468
5,Light Blue Solid Jade HW Skinny Denim TRS,"(Black Solid Jade HW Skinny Denim TRS,)",869,0.000652,15.40234,42.964468
6,Dark Blue Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS,)",854,0.000641,17.873587,45.891219
7,Light Blue Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS,)",854,0.000641,16.451551,45.891219
0,Black Solid Luna skinny RW,"(Blue Denim Luna skinny RW,)",826,0.00062,15.688509,37.412476
1,Blue Denim Luna skinny RW,"(Black Solid Luna skinny RW,)",826,0.00062,14.77903,37.412476


In [71]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
322326,Gold Solid Flirty Lava hoop pk,"(Dark Blue All over pattern Dahlia wrap top,)",1,7.502934e-07,5.263158,3188.545455
322325,Gold Solid Flirty Lava hoop pk,"(Green Solid Dixie crossbag,)",1,7.502934e-07,6.666667,4038.824242
322322,Black Solid SPEED OL Cyckel Sheer shorts,"(Orange Solid Karin headband,)",1,7.502934e-07,6.25,4384.25
322321,Orange Solid Karin headband,"(Black Solid SPEED OL Cyckel Sheer shorts,)",1,7.502934e-07,5.263158,4384.25
322319,Other Yellow Solid Kravitz body,"(Black Solid SPEED OL Cyckel Sheer shorts,)",1,7.502934e-07,5.263158,2262.83871
322305,Black Denim Skinny H.W Ankle Festive,"(Black Solid SPEED OL Cyckel Sheer shorts,)",1,7.502934e-07,5.263158,1252.642857
322318,Black Stripe CLARA TEE,"(Black Solid SPEED OL Cyckel Sheer shorts,)",1,7.502934e-07,5.263158,2418.896552
322317,Light Orange All over pattern Flirty crossbag,"(Black Solid SPEED OL Cyckel Sheer shorts,)",1,7.502934e-07,5.263158,2004.228571
322316,Black Solid KIMCHI LS,"(Black Solid SPEED OL Cyckel Sheer shorts,)",1,7.502934e-07,5.263158,3049.913043
322315,Red Solid KIMCHI LS,"(Black Solid SPEED OL Cyckel Sheer shorts,)",1,7.502934e-07,5.263158,2418.896552


In [72]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 3 # minimum length
zmax = 3 # maximum length 
report = 'asCl'
algo = 'ap' # 'fp' or 'ap'
#  run apriori
if algo == 'ap':
    result = arules(array_old, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == 'fp':
    result = fpgrowth(array_old, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == 'ap':
    colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)]
elif algo == 'fp':
    colnames = ['itemset'] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values('support_itemset_absolute', ascending=False)
print(df_result.shape)

(630048, 6)


In [73]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
18,Light Blue Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",200,0.00015,20.140987,56.182815
17,Dark Blue Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",200,0.00015,23.01496,59.091918
16,Black Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS, Dark Blue Solid Jade HW Skinny Denim TRS)",200,0.00015,23.419204,55.323282
530,Dark Grey Solid Jade HW Skinny Denim TRS,"(Dark Blue Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",119,8.9e-05,11.983887,76.642364
529,Dark Blue Solid Jade HW Skinny Denim TRS,"(Dark Grey Solid Jade HW Skinny Denim TRS, Black Solid Jade HW Skinny Denim TRS)",119,8.9e-05,30.357143,77.943295
528,Black Solid Jade HW Skinny Denim TRS,"(Dark Grey Solid Jade HW Skinny Denim TRS, Dark Blue Solid Jade HW Skinny Denim TRS)",119,8.9e-05,25.265393,59.684542
542,Dark Grey Solid Jade HW Skinny Denim TRS,"(Light Blue Solid Jade HW Skinny Denim TRS, Dark Blue Solid Jade HW Skinny Denim TRS)",115,8.6e-05,13.466042,86.121414
540,Dark Blue Solid Jade HW Skinny Denim TRS,"(Dark Grey Solid Jade HW Skinny Denim TRS, Light Blue Solid Jade HW Skinny Denim TRS)",115,8.6e-05,31.944444,82.018761
541,Light Blue Solid Jade HW Skinny Denim TRS,"(Dark Grey Solid Jade HW Skinny Denim TRS, Dark Blue Solid Jade HW Skinny Denim TRS)",115,8.6e-05,24.416136,68.108244
65247,Light Beige All over pattern LS Olivia Triangle Top,"(Greenish Khaki All over pattern LS Reggipetto Triangle Top, Light Beige All over pattern LS Olivia Cheeky Tanga)",107,8e-05,91.452991,1878.114706


In [74]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
251237,White Placement print BOULEVARD TEE TVP,"(Black All over pattern Stella Pull on TRS, Blue All over pattern Julie (1))",1,7.502934e-07,6.666667,84.062567
251236,Black Solid CHARLIE SKIRT,"(Black All over pattern Stella Pull on TRS, Blue All over pattern Julie (1))",1,7.502934e-07,6.666667,48.474704
251235,Black Contrast Heavy jsy long leg,"(Black All over pattern Stella Pull on TRS, Blue All over pattern Julie (1))",1,7.502934e-07,6.666667,34.640988
251234,Black Solid Tilly (1),"(Black All over pattern Stella Pull on TRS, Blue All over pattern Julie (1))",1,7.502934e-07,6.666667,29.189926
251233,Black Solid Luna skinny RW,"(Black All over pattern Stella Pull on TRS, Blue All over pattern Julie (1))",1,7.502934e-07,6.666667,15.898038
251232,Off White All over pattern Tiblisi paperwaist tapered,"(Black All over pattern Stella Pull on TRS, Black All over pattern Victoria Pull- On TRS)",1,7.502934e-07,5.555556,185.57672
251230,Grey Solid Ida Panel Jogger,"(Black All over pattern Stella Pull on TRS, Black Solid Daiquiri Pull- On TRS)",1,7.502934e-07,6.666667,190.674106
251229,Blue Denim Skinny H.W Ankle Queens,"(Black All over pattern Stella Pull on TRS, Black Solid Daiquiri Pull- On TRS)",1,7.502934e-07,6.666667,158.668095
251228,Dark Blue Denim Skinny H.W Ankle Queens,"(Black All over pattern Stella Pull on TRS, Black Solid Daiquiri Pull- On TRS)",1,7.502934e-07,6.666667,154.798142
251227,Dark Red Solid Siri Cardigan,"(Black All over pattern Stella Pull on TRS, Black Solid Daiquiri Pull- On TRS)",1,7.502934e-07,6.666667,197.893393


In [75]:
# parameters for apriori or fpgrowth algorithms
supp = 0.001 
conf = 5 
zmin = 2 # minimum length
zmax = 2 # maximum length 
report = 'asCl'
algo = 'ap' # 'fp' or 'ap'

# run apriori
if algo == 'ap':
    result = arules(array_type_old, zmin=zmin, zmax=zmax, supp=supp, conf=conf, report=report)
elif algo == 'fp':
    result = fpgrowth(array_type_old, zmin=zmin, zmax=zmax, supp=supp, report=report)

if algo == 'ap':
    colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)]
elif algo == 'fp':
    colnames = ['itemset'] + [report_colnames.get(k, k) for k in list(report)]
df_result = pd.DataFrame(result, columns=colnames)
df_result = df_result.sort_values('support_itemset_absolute', ascending=False)
print(df_result.shape)

(1711, 6)


In [77]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.head(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
0,Trousers,"(Sweater,)",77316,0.05801,30.332966,1.113474
1,Sweater,"(Trousers,)",77316,0.05801,21.294422,1.113474
153,Swimwear bottom,"(Bikini top,)",58501,0.043893,78.587069,13.959802
154,Bikini top,"(Swimwear bottom,)",58501,0.043893,77.969106,13.959802
3,Dress,"(Trousers,)",54380,0.040801,14.977374,0.775613
2,Trousers,"(Dress,)",54380,0.040801,21.129032,0.775613
6,Trousers,"(T-shirt,)",53242,0.039947,29.815592,1.094482
7,T-shirt,"(Trousers,)",53242,0.039947,14.663946,1.094482
20,Trousers,"(Blouse,)",49167,0.03689,31.165102,1.144021
21,Blouse,"(Trousers,)",49167,0.03689,13.541606,1.144021


In [78]:
with pd.option_context('display.max_colwidth', 400):
    display(df_result.tail(20))

Unnamed: 0,consequent,antecedent,support_itemset_absolute,support_itemset_relative,confidence_pct,lift
1666,Hat/beanie,"(Headband,)",1,7.502934e-07,6.666667,11.194927
1688,Hair/alice band,"(Sleeping sack,)",1,7.502934e-07,7.142857,12.795814
1689,Dungarees,"(Sleeping sack,)",1,7.502934e-07,7.142857,24.778984
1686,Underwear Tights,"(Sleeping sack,)",1,7.502934e-07,7.142857,7.402866
1664,Other accessories,"(Headband,)",1,7.502934e-07,6.666667,8.797439
1663,Coat,"(Headband,)",1,7.502934e-07,6.666667,6.170854
1660,Bag,"(Headband,)",1,7.502934e-07,6.666667,3.828929
1672,Dress,"(Sleeping sack,)",1,7.502934e-07,7.142857,0.369897
1659,Cardigan,"(Headband,)",1,7.502934e-07,6.666667,2.006915
1674,Skirt,"(Sleeping sack,)",1,7.502934e-07,7.142857,1.145756
