In [5]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [7]:
toy_dataset = [['Skirt', 'Sneakers', 'Scarf', 'Pants', 'Hat'],
                ['Sunglasses', 'Skirt', 'Sneakers', 'Pants', 'Hat'],
                ['Dress', 'Sandals', 'Scarf', 'Pants', 'Heels'],
                ['Dress', 'Necklace', 'Earrings', 'Scarf', 'Hat', 'Heels', 'Hat'],
                ['Earrings', 'Skirt', 'Skirt', 'Scarf', 'Shirt', 'Pants']]

In [9]:
df = pd.DataFrame(toy_dataset)
df

Unnamed: 0,0,1,2,3,4,5,6
0,Skirt,Sneakers,Scarf,Pants,Hat,,
1,Sunglasses,Skirt,Sneakers,Pants,Hat,,
2,Dress,Sandals,Scarf,Pants,Heels,,
3,Dress,Necklace,Earrings,Scarf,Hat,Heels,Hat
4,Earrings,Skirt,Skirt,Scarf,Shirt,Pants,


In [11]:
item = df.apply(lambda x: [', '.join(x.dropna().astype(str))], axis=1)

# Display the items
print(item)

0                 [Skirt, Sneakers, Scarf, Pants, Hat]
1            [Sunglasses, Skirt, Sneakers, Pants, Hat]
2                [Dress, Sandals, Scarf, Pants, Heels]
3    [Dress, Necklace, Earrings, Scarf, Hat, Heels,...
4        [Earrings, Skirt, Skirt, Scarf, Shirt, Pants]
dtype: object


In [13]:
# Use TransactionEncoder to convert the list of transactions into a one-hot encoded DataFrame
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_data = te.fit(item).transform(item)

# Display the output
te_data

array([[False, False, False,  True, False],
       [False, False, False, False,  True],
       [False,  True, False, False, False],
       [ True, False, False, False, False],
       [False, False,  True, False, False]])

In [15]:
# Assuming te_data is your transactional data
te_data = [
    ['Dress', 'Necklace', 'Earrings', 'Scarf', 'Hat', 'Heels', 'Hat'],
    ['Dress', 'Sandals', 'Scarf', 'Pants', 'Heels'],
    ['Earrings', 'Skirt', 'Skirt', 'Scarf', 'Shirt', 'Pants'],
    ['Skirt', 'Sneakers', 'Scarf', 'Pants', 'Hat'],
    ['Sunglasses', 'Skirt', 'Sneakers', 'Pants', 'Hat']
]

# Get unique items
items = set()
for transaction in te_data:
    for item in transaction:
        items.add(item)

# Create a DataFrame with one-hot encoding
onehot_df = pd.DataFrame(index=range(len(te_data)), columns=sorted(items))

# Populate the DataFrame
for i, transaction in enumerate(te_data):
    for item in transaction:
        onehot_df.loc[i, item] = True

# Replace NaN with False
onehot_df.fillna(False, inplace=True)

onehot_df

  onehot_df.fillna(False, inplace=True)


Unnamed: 0,Dress,Earrings,Hat,Heels,Necklace,Pants,Sandals,Scarf,Shirt,Skirt,Sneakers,Sunglasses
0,True,True,True,True,True,False,False,True,False,False,False,False
1,True,False,False,True,False,True,True,True,False,False,False,False
2,False,True,False,False,False,True,False,True,True,True,False,False
3,False,False,True,False,False,True,False,True,False,True,True,False
4,False,False,True,False,False,True,False,False,False,True,True,True


In [17]:
# We'll set a `min_support` value to control the threshold of how frequent an itemset must be to be considered.

frequent_itemsets = apriori(onehot_df, min_support=0.2, use_colnames=True)
# 'min_support=0.2' means that we are only interested in itemsets that appear in at least 20% of the transactions.

frequent_itemsets

Unnamed: 0,support,itemsets
0,0.4,(Dress)
1,0.4,(Earrings)
2,0.6,(Hat)
3,0.4,(Heels)
4,0.2,(Necklace)
...,...,...
146,0.2,"(Heels, Earrings, Necklace, Hat, Scarf)"
147,0.2,"(Skirt, Earrings, Pants, Shirt, Scarf)"
148,0.2,"(Skirt, Sneakers, Hat, Pants, Scarf)"
149,0.2,"(Skirt, Sneakers, Sunglasses, Hat, Pants)"


In [19]:
# Generate Association Rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
# We use 'lift' as the metric to determine how much more likely two items are to be purchased together compared to random chance.
# We use 'min_threshold' to filter the rules to only include those with a lift greater than 1.

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Dress),(Earrings),0.4,0.4,0.2,0.500000,1.250000,1.0,0.04,1.200000,0.333333,0.333333,0.166667,0.500000
1,(Earrings),(Dress),0.4,0.4,0.2,0.500000,1.250000,1.0,0.04,1.200000,0.333333,0.333333,0.166667,0.500000
2,(Dress),(Heels),0.4,0.4,0.4,1.000000,2.500000,1.0,0.24,inf,1.000000,1.000000,1.000000,1.000000
3,(Heels),(Dress),0.4,0.4,0.4,1.000000,2.500000,1.0,0.24,inf,1.000000,1.000000,1.000000,1.000000
4,(Dress),(Necklace),0.4,0.2,0.2,0.500000,2.500000,1.0,0.12,1.600000,1.000000,0.500000,0.375000,0.750000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1135,(Earrings),"(Heels, Necklace, Hat, Dress, Scarf)",0.4,0.2,0.2,0.500000,2.500000,1.0,0.12,1.600000,1.000000,0.500000,0.375000,0.750000
1136,(Necklace),"(Heels, Earrings, Hat, Dress, Scarf)",0.2,0.2,0.2,1.000000,5.000000,1.0,0.16,inf,1.000000,1.000000,1.000000,1.000000
1137,(Hat),"(Heels, Earrings, Necklace, Dress, Scarf)",0.6,0.2,0.2,0.333333,1.666667,1.0,0.08,1.200000,1.000000,0.333333,0.166667,0.666667
1138,(Dress),"(Heels, Earrings, Necklace, Hat, Scarf)",0.4,0.2,0.2,0.500000,2.500000,1.0,0.12,1.600000,1.000000,0.500000,0.375000,0.750000
