In [1]:
import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import pickle
import random

In [2]:
def recommend_items(input_items, rules, top_n=5, metric='confidence'):
    recommendations = {}
    for i , (antecedent, consequent, antecedent_support, consequent_support, support, confidence, lift, leverage, conviction, zhangs_metric) in rules.iterrows():
        if antecedent.issubset(input_items) and not consequent.issubset(input_items):
            for item in consequent:
                if item not in input_items:
                    if item not in recommendations:
                        recommendations[item] = []
                    recommendations[item].append((confidence, support, lift, leverage, conviction, zhangs_metric))
    # Average the confidence and support values for the same item
    recommendations = {
        item: (
            sum(conf for conf, _, _, _, _, _ in item_rules) / len(item_rules), 
            sum(sup for _, sup, _, _, _, _ in item_rules) / len(item_rules), 
            sum(lift for _, _, lift, _, _, _ in item_rules) / len(item_rules), 
            sum(leverage for _, _, _, leverage, _, _ in item_rules) / len(item_rules),
            sum(conviction for _, _, _, _, conviction, _ in item_rules) / len(item_rules), 
            sum(zhangs_metric for _, _, _, _, _, zhangs_metric in item_rules) / len(item_rules)
                )
        for item, item_rules in recommendations.items()
    }
    
    if metric == 'confidence':
        # Sort the recommendations by decreasing confidence and support
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][0], -x[1][1]))
    elif metric == 'support':
        # Sort the recommendations by decreasing support and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][1], -x[1][0]))
    elif metric == 'lift':
        # Sort the recommendations by decreasing lift and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][2], -x[1][0]))
    elif metric == 'leverage':
        # Sort the recommendations by decreasing leverage and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][3], -x[1][0]))
    elif metric == 'conviction':
        # Sort the recommendations by decreasing conviction and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][4], -x[1][0]))
    elif metric == 'zhangs_metric':
        # Sort the recommendations by decreasing Zhang's metric and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][5], -x[1][0]))
    else:
        raise ValueError('Invalid metric: {}'.format(metric))

    return [item for item, _ in sorted_recommendations[:top_n]]


def evaluate_recommendations(test_data, user_items, rules, top_n, metric='confidence'):
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    for user, true_items in tqdm(test_data.items()):
        # Assuming user_items is a dictionary with user IDs as keys and their associated items as values
        input_items = user_items[user]
        # Get recommendations for the user
        recommended_items = set(recommend_items(input_items, rules, top_n=top_n, metric=metric))
        true_items = set(true_items)
        true_positives += len(recommended_items.intersection(true_items))
        false_positives += len(recommended_items - true_items)
        false_negatives += len(true_items - recommended_items)
    # Calculate precision, recall, and F1 score
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1_score

In [4]:
# All baskets
with open('baskets.pkl', 'rb') as f:
    baskets = pickle.load(f)

# All baskets user has ever made
with open('baskets_by_user.pkl', 'rb') as f:
    user_baskets = pickle.load(f)

# All items user has ever bought
with open('items_by_user.pkl', 'rb') as f:
    user_items = pickle.load(f)

In [5]:
# Encode baskets
te = TransactionEncoder()
te_ary = te.fit(baskets).transform(baskets)
df = pd.DataFrame(te_ary, columns=te.columns_)
df = df.reindex(sorted(df.columns), axis=1)  # sort columns by value
df

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14958,False,False,False,False,False,False,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False
14959,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14960,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14961,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [6]:
def create_test_data(baskets, test_size=0.1):
    '''
    Create test data from the given baskets - drop items from the baskets and add them to the test data - see how well the model can predict them

        :param user_items: dictionary with user IDs as keys and their associated items as values
        :param test_size: size of the test data

    :return: 
        train_data - dictionary with transaction IDs as keys and their associated items as values 
        test_data - dictionary with transaction IDs as keys and their associated items as values - for testing the model (items that were left in the basket)
        test_labels - dictionary with transaction IDs as keys and their associated items as values - for testing the model (items that were dropped from the basket)
    '''

    train_data = {basket_id : items for basket_id, items in enumerate(baskets) if np.random.rand() > test_size}
    test_labels = {}
    test_data = {}

    for id, basket in enumerate(baskets):
        if id not in train_data:
            n = len(basket)
            basket = list(basket)
            if n > 1:
                # n_to_drop = np.random.randint(1, n // 2 + 1)  # drop at least one item, but not more than half of the items
                n_to_drop = 1
                items_to_drop = np.random.choice(basket, n_to_drop, replace=False)  # choose items to drop
                basket = set(basket) - set(items_to_drop)  # remove items from the basket
                test_labels[id] = set(items_to_drop)  # add items to the test data
                test_data[id] = basket  # update user items
    
    return train_data, test_data, test_labels


def encode_baskets(baskets_list):
    '''
    Encode baskets

    :param baskets: list of baskets

    :return: encoded baskets
    '''
    te = TransactionEncoder()
    te_ary = te.fit(baskets_list).transform(baskets_list)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    df = df.reindex(sorted(df.columns), axis=1)  # sort columns by value
    return df

In [7]:
train, test, test_labels = create_test_data(baskets, test_size=0.1)

In [8]:
len(train), len(test), len(test_labels), test.keys() == test_labels.keys()

(13502, 1461, 1461, True)

In [17]:
with open('train_set.pkl', 'wb') as f:
    pickle.dump(train, f)

with open('test_set.pkl', 'wb') as f:
    pickle.dump(test, f)

with open('test_labels.pkl', 'wb') as f:
    pickle.dump(test_labels, f)

In [10]:
def play_with(frequent_itemsets, metrics, test_data, test_labels, top_n=5):
    '''
    Play with different metrics and thresholds for the creation of rules

    :param frequent_itemsets: frequent itemsets 
    :param methods: dictionary of methods to use for the creation of rules mapped to their thresholds

    :return: dataframe of methods, thresholds, precision, recall, f1_score
    '''
    results = []
    if type(top_n) == list:
        for n in top_n:
            for metric, threshold_list in metrics.items():
                loop = tqdm(threshold_list)
                for threshold in loop:
                    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=threshold)
                    loop.set_description(f'Method: {metric}, Threshold: {threshold:.4f}, N_rules: {len(rules)}')
                    precision, recall, f1_score = evaluate_recommendations(test_data, test_labels, rules, top_n=n, metric=metric)
                    results.append((metric, threshold, n, precision, recall, f1_score))
        return pd.DataFrame(results, columns=['method', 'threshold', 'top_n', 'precision', 'recall', 'f1_score'])
    else:
        for metric, threshold_list in metrics.items():
            loop = tqdm(threshold_list)
            for threshold in loop:
                rules = association_rules(frequent_itemsets, metric=metric, min_threshold=threshold)
                loop.set_description(f'Method: {metric}, Threshold: {threshold:.4f}, N_rules: {len(rules)}')
                precision, recall, f1_score = evaluate_recommendations(test_data, test_labels, rules, top_n=top_n, metric=metric)
                results.append((metric, threshold, precision, recall, f1_score))
        return pd.DataFrame(results, columns=['method', 'threshold', 'precision', 'recall', 'f1_score'])

In [11]:
encoded_transactions = encode_baskets(list(train.values()))
freq_itemsets = apriori(encoded_transactions, min_support=0.0001, use_colnames=True)

methods_dict = {
    'support': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1],
    'confidence':[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1],
    'lift':[0, 0.5, 5, 10, 20, 50, 100],
    'leverage':[0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001],
    'conviction':[0.5, 1, 1.1, 1.25, 1.4, 1.5, 2, 5],
    'zhangs_metric':[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 1],
}

results = play_with(freq_itemsets, methods_dict, test, test_labels)

100%|██████████| 1461/1461 [12:07<00:00,  2.01it/s]   0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [01:10<00:00, 20.67it/s] 14%|█▍        | 1/7 [12:08<1:12:48, 728.16s/it] 
100%|██████████| 1461/1461 [00:26<00:00, 54.28it/s] 29%|██▊       | 2/7 [13:18<28:27, 341.42s/it]  
100%|██████████| 1461/1461 [00:01<00:00, 858.86it/s]%|████▎     | 3/7 [13:45<13:11, 197.82s/it]  
100%|██████████| 1461/1461 [00:00<00:00, 4784.12it/s]|█████▋    | 4/7 [13:47<06:01, 120.40s/it]
100%|██████████| 1461/1461 [00:00<00:00, 261685.02it/s]█████▏  | 5/7 [13:47<02:34, 77.10s/it]  
100%|██████████| 1461/1461 [00:00<00:00, 268331.14it/s]█████▏  | 5/7 [13:47<02:34, 77.10s/it]
Method: support, Threshold: 0.1000, N_rules: 0: 100%|██████████| 7/7 [13:47<00:00, 118.27s/it]
100%|██████████| 1461/1461 [07:45<00:00,  3.14it/s]31:   0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [04:39<00:00,  5.23it/s]95:  14%|█▍        | 1/7 [07:45<46:31, 465.19s/it]
100%|██████████| 1461/1461 [03:01<0

In [12]:
display(results.sort_values(by='f1_score', ascending=False).head(10))
display(results.sort_values(by='precision', ascending=False).head(10))
display(results.sort_values(by='recall', ascending=False).head(10))

Unnamed: 0,method,threshold,precision,recall,f1_score
8,confidence,0.05,0.062552,0.203245,0.095662
2,support,0.001,0.056607,0.159081,0.083501
3,support,0.005,0.067102,0.083371,0.074357
7,confidence,0.01,0.04677,0.153673,0.071714
9,confidence,0.1,0.074092,0.06895,0.071429
1,support,0.0005,0.045368,0.139703,0.068493
4,support,0.01,0.071429,0.030644,0.042889
42,zhangs_metric,1.0,0.027148,0.065795,0.038436
17,lift,10.0,0.025833,0.066697,0.037242
22,leverage,0.0002,0.023856,0.066246,0.035079


Unnamed: 0,method,threshold,precision,recall,f1_score
30,conviction,1.25,0.2,0.000451,0.000899
10,confidence,0.25,0.111111,0.000451,0.000898
9,confidence,0.1,0.074092,0.06895,0.071429
4,support,0.01,0.071429,0.030644,0.042889
3,support,0.005,0.067102,0.083371,0.074357
8,confidence,0.05,0.062552,0.203245,0.095662
29,conviction,1.1,0.058824,0.001352,0.002643
2,support,0.001,0.056607,0.159081,0.083501
7,confidence,0.01,0.04677,0.153673,0.071714
1,support,0.0005,0.045368,0.139703,0.068493


Unnamed: 0,method,threshold,precision,recall,f1_score
8,confidence,0.05,0.062552,0.203245,0.095662
2,support,0.001,0.056607,0.159081,0.083501
7,confidence,0.01,0.04677,0.153673,0.071714
1,support,0.0005,0.045368,0.139703,0.068493
3,support,0.005,0.067102,0.083371,0.074357
41,zhangs_metric,0.95,0.023193,0.069401,0.034767
9,confidence,0.1,0.074092,0.06895,0.071429
17,lift,10.0,0.025833,0.066697,0.037242
22,leverage,0.0002,0.023856,0.066246,0.035079
42,zhangs_metric,1.0,0.027148,0.065795,0.038436


In [13]:
encoded_transactions = encode_baskets(list(train.values()))
freq_itemsets = apriori(encoded_transactions, min_support=0.001, use_colnames=True)
methods_dict = {
    'support': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1],
    'confidence':[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1],
    'lift':[0, 0.5, 5, 10, 20, 50, 100],
    'leverage':[0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001],
    'conviction':[0.5, 1, 1.1, 1.25, 1.4, 1.5, 2, 5],
    'zhangs_metric':[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 1],
}
results_2 = play_with(freq_itemsets, methods_dict, test, test_labels, top_n=[1, 2, 3, 4, 5])

100%|██████████| 1461/1461 [00:31<00:00, 45.73it/s]  0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:32<00:00, 44.98it/s] 14%|█▍        | 1/7 [00:32<03:11, 31.98s/it]
100%|██████████| 1461/1461 [00:32<00:00, 45.26it/s] 29%|██▊       | 2/7 [01:04<02:41, 32.30s/it]
100%|██████████| 1461/1461 [00:02<00:00, 681.21it/s]%|████▎     | 3/7 [01:36<02:09, 32.33s/it]  
100%|██████████| 1461/1461 [00:00<00:00, 3770.72it/s]|█████▋    | 4/7 [01:39<01:01, 20.44s/it]
100%|██████████| 1461/1461 [00:00<00:00, 183041.94it/s]█████▏  | 5/7 [01:39<00:26, 13.22s/it] 
100%|██████████| 1461/1461 [00:00<00:00, 252020.49it/s]█████▏  | 5/7 [01:39<00:26, 13.22s/it]
Method: support, Threshold: 0.1000, N_rules: 0: 100%|██████████| 7/7 [01:39<00:00, 14.22s/it]
100%|██████████| 1461/1461 [00:29<00:00, 48.90it/s]9:   0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:11<00:00, 130.93it/s]  14%|█▍        | 1/7 [00:29<02:59, 29.89s/it] 
100%|██████████| 1461/1461 [00:03<00:00, 483.17it/

In [14]:
display(results_2.sort_values(by='f1_score', ascending=False).head(10))
display(results_2.sort_values(by='precision', ascending=False).head(10))
display(results_2.sort_values(by='recall', ascending=False).head(10))

Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
180,confidence,0.05,5,0.065045,0.180712,0.095658
137,confidence,0.05,4,0.067554,0.153222,0.093767
94,confidence,0.05,3,0.073037,0.125732,0.092399
179,confidence,0.01,5,0.059333,0.166742,0.087522
51,confidence,0.05,2,0.080077,0.093285,0.086178
136,confidence,0.01,4,0.061991,0.140604,0.086045
93,confidence,0.01,3,0.067277,0.115818,0.085113
172,support,0.0001,5,0.056607,0.159081,0.083501
173,support,0.0005,5,0.056607,0.159081,0.083501
174,support,0.001,5,0.056607,0.159081,0.083501


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
4,support,0.01,1,0.097561,0.016224,0.027821
3,support,0.005,1,0.089163,0.029292,0.044098
9,confidence,0.1,1,0.086294,0.045967,0.059982
8,confidence,0.05,1,0.083589,0.049121,0.061879
52,confidence,0.1,2,0.081454,0.058585,0.068152
46,support,0.005,2,0.081122,0.04822,0.060486
51,confidence,0.05,2,0.080077,0.093285,0.086178
181,confidence,0.1,5,0.078963,0.059036,0.067561
138,confidence,0.1,4,0.078963,0.059036,0.067561
95,confidence,0.1,3,0.078963,0.059036,0.067561


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
180,confidence,0.05,5,0.065045,0.180712,0.095658
179,confidence,0.01,5,0.059333,0.166742,0.087522
172,support,0.0001,5,0.056607,0.159081,0.083501
173,support,0.0005,5,0.056607,0.159081,0.083501
174,support,0.001,5,0.056607,0.159081,0.083501
137,confidence,0.05,4,0.067554,0.153222,0.093767
136,confidence,0.01,4,0.061991,0.140604,0.086045
130,support,0.0005,4,0.058017,0.131591,0.08053
131,support,0.001,4,0.058017,0.131591,0.08053
129,support,0.0001,4,0.058017,0.131591,0.08053


In [15]:
encoded_transactions = encode_baskets(list(train.values()))
freq_itemsets = apriori(encoded_transactions, min_support=0.01, use_colnames=True)
methods_dict = {
    'support': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1],
    'confidence':[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1],
    'lift':[0, 0.5, 5, 10, 20, 50, 100],
    'leverage':[0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001],
    'conviction':[0.5, 1, 1.1, 1.25, 1.4, 1.5, 2, 5],
    'zhangs_metric':[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 1],
}
results_3 = play_with(freq_itemsets, methods_dict, test, test_labels, top_n=[1, 2, 3, 4, 5])

100%|██████████| 1461/1461 [00:00<00:00, 4331.97it/s]|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4392.16it/s]|█▍        | 1/7 [00:00<00:02,  2.93it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4594.88it/s]|██▊       | 2/7 [00:00<00:01,  2.94it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4561.19it/s]|████▎     | 3/7 [00:01<00:01,  3.02it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4489.88it/s]|█████▋    | 4/7 [00:01<00:00,  3.04it/s]
100%|██████████| 1461/1461 [00:00<00:00, 252414.97it/s]█████▏  | 5/7 [00:01<00:00,  3.02it/s] 
100%|██████████| 1461/1461 [00:00<00:00, 249222.31it/s]█████▏  | 5/7 [00:01<00:00,  3.02it/s]
Method: support, Threshold: 0.1000, N_rules: 0: 100%|██████████| 7/7 [00:01<00:00,  4.16it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4480.21it/s] 0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4284.28it/s]14%|█▍        | 1/7 [00:00<00:01,  3.04it/s]
100%|██████████| 1461/1461 [00:00<00:00, 9159.65it/s]9%|██▊   

In [16]:
display(results_3.sort_values(by='f1_score', ascending=False).head(10))
display(results_3.sort_values(by='precision', ascending=False).head(10))
display(results_3.sort_values(by='recall', ascending=False).head(10))

Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
173,support,0.0005,5,0.071429,0.030644,0.042889
143,lift,0.0,4,0.071429,0.030644,0.042889
199,conviction,0.5,5,0.071429,0.030644,0.042889
172,support,0.0001,5,0.071429,0.030644,0.042889
174,support,0.001,5,0.071429,0.030644,0.042889
175,support,0.005,5,0.071429,0.030644,0.042889
176,support,0.01,5,0.071429,0.030644,0.042889
144,lift,0.5,4,0.071429,0.030644,0.042889
179,confidence,0.01,5,0.071429,0.030644,0.042889
180,confidence,0.05,5,0.071429,0.030644,0.042889


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
9,confidence,0.1,1,0.110294,0.01352,0.024087
95,confidence,0.1,3,0.097701,0.015322,0.02649
181,confidence,0.1,5,0.097701,0.015322,0.02649
52,confidence,0.1,2,0.097701,0.015322,0.02649
138,confidence,0.1,4,0.097701,0.015322,0.02649
8,confidence,0.05,1,0.097561,0.016224,0.027821
1,support,0.0005,1,0.097561,0.016224,0.027821
0,support,0.0001,1,0.097561,0.016224,0.027821
7,confidence,0.01,1,0.097561,0.016224,0.027821
2,support,0.001,1,0.097561,0.016224,0.027821


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
173,support,0.0005,5,0.071429,0.030644,0.042889
143,lift,0.0,4,0.071429,0.030644,0.042889
199,conviction,0.5,5,0.071429,0.030644,0.042889
172,support,0.0001,5,0.071429,0.030644,0.042889
174,support,0.001,5,0.071429,0.030644,0.042889
175,support,0.005,5,0.071429,0.030644,0.042889
176,support,0.01,5,0.071429,0.030644,0.042889
144,lift,0.5,4,0.071429,0.030644,0.042889
179,confidence,0.01,5,0.071429,0.030644,0.042889
180,confidence,0.05,5,0.071429,0.030644,0.042889
