In [4]:
from mlxtend.frequent_patterns import association_rules
import pandas as pd
import pickle
from ast import literal_eval
from tqdm import tqdm
import numpy as np

In [5]:
def recommend_items(input_items, rules, top_n=5, metric='confidence'):
    recommendations = {}
    for i , (antecedent, consequent, antecedent_support, consequent_support, support, confidence, lift, leverage, conviction, zhangs_metric) in rules.iterrows():
        if antecedent.issubset(input_items) and not consequent.issubset(input_items):
            for item in consequent:
                if item not in input_items:
                    if item not in recommendations:
                        recommendations[item] = []
                    recommendations[item].append((confidence, support, lift, leverage, conviction, zhangs_metric))
    # Average the confidence and support values for the same item
    recommendations = {
        item: (
            sum(conf for conf, _, _, _, _, _ in item_rules) / len(item_rules), 
            sum(sup for _, sup, _, _, _, _ in item_rules) / len(item_rules), 
            sum(lift for _, _, lift, _, _, _ in item_rules) / len(item_rules), 
            sum(leverage for _, _, _, leverage, _, _ in item_rules) / len(item_rules),
            sum(conviction for _, _, _, _, conviction, _ in item_rules) / len(item_rules), 
            sum(zhangs_metric for _, _, _, _, _, zhangs_metric in item_rules) / len(item_rules)
                )
        for item, item_rules in recommendations.items()
    }
    
    if metric == 'confidence':
        # Sort the recommendations by decreasing confidence and support
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][0], -x[1][1]))
    elif metric == 'support':
        # Sort the recommendations by decreasing support and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][1], -x[1][0]))
    elif metric == 'lift':
        # Sort the recommendations by decreasing lift and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][2], -x[1][0]))
    elif metric == 'leverage':
        # Sort the recommendations by decreasing leverage and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][3], -x[1][0]))
    elif metric == 'conviction':
        # Sort the recommendations by decreasing conviction and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][4], -x[1][0]))
    elif metric == 'zhangs_metric':
        # Sort the recommendations by decreasing Zhang's metric and confidence
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: (-x[1][5], -x[1][0]))
    else:
        raise ValueError('Invalid metric: {}'.format(metric))

    return [item for item, _ in sorted_recommendations[:top_n]]

def evaluate_recommendations(test_data, user_items, rules, top_n, metric='confidence'):
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    for user, true_items in tqdm(test_data.items()):
        # Assuming user_items is a dictionary with user IDs as keys and their associated items as values
        input_items = user_items[user]
        # Get recommendations for the user
        recommended_items = set(recommend_items(input_items, rules, top_n=top_n, metric=metric))
        true_items = set(true_items)
        true_positives += len(recommended_items.intersection(true_items))
        false_positives += len(recommended_items - true_items)
        false_negatives += len(true_items - recommended_items)
    # Calculate precision, recall, and F1 score
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1_score

def create_test_data(baskets, test_size=0.1):
    '''
    Create test data from the given baskets - drop items from the baskets and add them to the test data - see how well the model can predict them

        :param user_items: dictionary with user IDs as keys and their associated items as values
        :param test_size: size of the test data

    :return: 
        train_data - dictionary with transaction IDs as keys and their associated items as values 
        test_data - dictionary with transaction IDs as keys and their associated items as values - for testing the model (items that were left in the basket)
        test_labels - dictionary with transaction IDs as keys and their associated items as values - for testing the model (items that were dropped from the basket)
    '''

    train_data = {basket_id : items for basket_id, items in enumerate(baskets) if np.random.rand() > test_size}
    test_labels = {}
    test_data = {}

    for id, basket in enumerate(baskets):
        if id not in train_data:
            n = len(basket)
            basket = list(basket)
            if n > 1:
                # n_to_drop = np.random.randint(1, n // 2 + 1)  # drop at least one item, but not more than half of the items
                n_to_drop = 1
                items_to_drop = np.random.choice(basket, n_to_drop, replace=False)  # choose items to drop
                basket = set(basket) - set(items_to_drop)  # remove items from the basket
                test_labels[id] = set(items_to_drop)  # add items to the test data
                test_data[id] = basket  # update user items
    
    return train_data, test_data, test_labels

def play_with(frequent_itemsets, metrics, test_data, test_labels, top_n=5):
    '''
    Play with different metrics and thresholds for the creation of rules

    :param frequent_itemsets: frequent itemsets 
    :param methods: dictionary of methods to use for the creation of rules mapped to their thresholds

    :return: dataframe of methods, thresholds, precision, recall, f1_score
    '''
    results = []
    if type(top_n) == list:
        for n in top_n:
            for metric, threshold_list in metrics.items():
                loop = tqdm(threshold_list)
                for threshold in loop:
                    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=threshold)
                    loop.set_description(f'Method: {metric}, Threshold: {threshold:.4f}, N_rules: {len(rules)}')
                    precision, recall, f1_score = evaluate_recommendations(test_data, test_labels, rules, top_n=n, metric=metric)
                    results.append((metric, threshold, n, precision, recall, f1_score))
        return pd.DataFrame(results, columns=['method', 'threshold', 'top_n', 'precision', 'recall', 'f1_score'])
    else:
        for metric, threshold_list in metrics.items():
            loop = tqdm(threshold_list)
            for threshold in loop:
                rules = association_rules(frequent_itemsets, metric=metric, min_threshold=threshold)
                loop.set_description(f'Method: {metric}, Threshold: {threshold:.4f}, N_rules: {len(rules)}')
                precision, recall, f1_score = evaluate_recommendations(test_data, test_labels, rules, top_n=top_n, metric=metric)
                results.append((metric, threshold, precision, recall, f1_score))
        return pd.DataFrame(results, columns=['method', 'threshold', 'precision', 'recall', 'f1_score'])

In [6]:
# Use same as in task 2 for comparison purposes
with open('../assignment_02_task2/test_set.pkl', 'rb') as f:
    test = pickle.load(f)

with open('../assignment_02_task2/test_labels.pkl', 'rb') as f:
    test_labels = pickle.load(f)

In [11]:
# min_support ~ 0.0001
freq_itemsets = pickle.load(open('decoded_out_2_10.pkl', 'rb'))

methods_dict = {
    'support': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1],
    'confidence':[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1],
    'lift':[0, 0.5, 5, 10, 20, 50, 100],
    'leverage':[0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001],
    'conviction':[0.5, 1, 1.1, 1.25, 1.4, 1.5, 2, 5],
    'zhangs_metric':[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 1],
}

results = play_with(freq_itemsets, methods_dict, test, test_labels)

100%|██████████| 1461/1461 [15:48<00:00,  1.54it/s]   0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [01:12<00:00, 20.02it/s] 14%|█▍        | 1/7 [15:48<1:34:52, 948.76s/it] 
100%|██████████| 1461/1461 [00:24<00:00, 60.20it/s] 29%|██▊       | 2/7 [17:01<36:08, 433.62s/it]  
100%|██████████| 1461/1461 [00:00<00:00, 2461.98it/s]|████▎     | 3/7 [17:26<16:26, 246.71s/it]  
100%|██████████| 1461/1461 [00:00<00:00, 270498.73it/s]███▋    | 4/7 [17:26<07:28, 149.56s/it] 
100%|██████████| 1461/1461 [00:00<00:00, 274067.63it/s]███▋    | 4/7 [17:26<07:28, 149.56s/it]
100%|██████████| 1461/1461 [00:00<00:00, 271157.05it/s]███▋    | 4/7 [17:26<07:28, 149.56s/it]
Method: support, Threshold: 0.1000, N_rules: 0: 100%|██████████| 7/7 [17:26<00:00, 149.54s/it]
100%|██████████| 1461/1461 [15:21<00:00,  1.58it/s]11:   0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [12:22<00:00,  1.97it/s]66:  14%|█▍        | 1/7 [15:22<1:32:12, 922.10s/it]
100%|██████████| 1461/1461 [10:17

In [12]:
display(results.sort_values(by='f1_score', ascending=False).head(10))
display(results.sort_values(by='recall', ascending=False).head(10))
display(results.sort_values(by='precision', ascending=False).head(10))

Unnamed: 0,method,threshold,precision,recall,f1_score
2,support,0.001,0.050231,0.141956,0.074205
26,leverage,0.001,0.049952,0.140153,0.073653
1,support,0.0005,0.043238,0.133844,0.065361
25,leverage,0.0005,0.043,0.132041,0.064873
13,confidence,1.0,0.047894,0.093285,0.063293
24,leverage,0.0004,0.039983,0.125732,0.060672
34,conviction,5.0,0.043769,0.097341,0.060386
12,confidence,0.75,0.043136,0.098693,0.060033
23,leverage,0.0003,0.035207,0.111762,0.053546
33,conviction,2.0,0.036048,0.088779,0.051275


Unnamed: 0,method,threshold,precision,recall,f1_score
2,support,0.001,0.050231,0.141956,0.074205
26,leverage,0.001,0.049952,0.140153,0.073653
1,support,0.0005,0.043238,0.133844,0.065361
25,leverage,0.0005,0.043,0.132041,0.064873
24,leverage,0.0004,0.039983,0.125732,0.060672
23,leverage,0.0003,0.035207,0.111762,0.053546
12,confidence,0.75,0.043136,0.098693,0.060033
34,conviction,5.0,0.043769,0.097341,0.060386
13,confidence,1.0,0.047894,0.093285,0.063293
29,conviction,1.1,0.031652,0.091032,0.046971


Unnamed: 0,method,threshold,precision,recall,f1_score
3,support,0.005,0.051442,0.029743,0.037693
2,support,0.001,0.050231,0.141956,0.074205
26,leverage,0.001,0.049952,0.140153,0.073653
13,confidence,1.0,0.047894,0.093285,0.063293
34,conviction,5.0,0.043769,0.097341,0.060386
1,support,0.0005,0.043238,0.133844,0.065361
12,confidence,0.75,0.043136,0.098693,0.060033
25,leverage,0.0005,0.043,0.132041,0.064873
24,leverage,0.0004,0.039983,0.125732,0.060672
33,conviction,2.0,0.036048,0.088779,0.051275


In [7]:
# min_support ~ 0.001
freq_itemsets = pickle.load(open('decoded_out_14_10.pkl', 'rb'))

methods_dict = {
    'support': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1],
    'confidence':[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1],
    'lift':[0, 0.5, 5, 10, 20, 50, 100],
    'leverage':[0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001],
    'conviction':[0.5, 1, 1.1, 1.25, 1.4, 1.5, 2, 5],
    'zhangs_metric':[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 1],
}

results2 = play_with(freq_itemsets, methods_dict, test, test_labels, top_n=[1, 2, 3, 4, 5])

100%|██████████| 1461/1461 [00:28<00:00, 51.10it/s]  0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:28<00:00, 51.82it/s] 14%|█▍        | 1/7 [00:28<02:51, 28.61s/it]
100%|██████████| 1461/1461 [00:28<00:00, 52.08it/s] 29%|██▊       | 2/7 [00:56<02:21, 28.37s/it]
100%|██████████| 1461/1461 [00:01<00:00, 779.52it/s]%|████▎     | 3/7 [01:24<01:52, 28.23s/it]  
100%|██████████| 1461/1461 [00:00<00:00, 4931.56it/s]|█████▋    | 4/7 [01:26<00:53, 17.82s/it]
100%|██████████| 1461/1461 [00:00<00:00, 270295.89it/s]█████▏  | 5/7 [01:27<00:23, 11.51s/it] 
100%|██████████| 1461/1461 [00:00<00:00, 267581.25it/s]█████▏  | 5/7 [01:27<00:23, 11.51s/it]
Method: support, Threshold: 0.1000, N_rules: 0: 100%|██████████| 7/7 [01:27<00:00, 12.44s/it]
100%|██████████| 1461/1461 [00:28<00:00, 51.82it/s]6:   0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:22<00:00, 65.84it/s]9:  14%|█▍        | 1/7 [00:28<02:49, 28.20s/it]
100%|██████████| 1461/1461 [00:16<00:00, 87.90it/s

In [8]:
display(results2.sort_values(by='f1_score', ascending=False).head(10))
display(results2.sort_values(by='recall', ascending=False).head(10))
display(results2.sort_values(by='precision', ascending=False).head(10))

Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
200,conviction,1.0,5,0.060118,0.169896,0.08881
199,conviction,0.5,5,0.060118,0.169896,0.08881
193,leverage,0.0001,5,0.058523,0.16539,0.086455
194,leverage,0.0002,5,0.058523,0.16539,0.086455
195,leverage,0.0003,5,0.058523,0.16539,0.086455
196,leverage,0.0004,5,0.058523,0.16539,0.086455
197,leverage,0.0005,5,0.058523,0.16539,0.086455
198,leverage,0.001,5,0.058143,0.163137,0.085731
174,support,0.001,5,0.057407,0.162235,0.084806
173,support,0.0005,5,0.057407,0.162235,0.084806


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
199,conviction,0.5,5,0.060118,0.169896,0.08881
200,conviction,1.0,5,0.060118,0.169896,0.08881
193,leverage,0.0001,5,0.058523,0.16539,0.086455
194,leverage,0.0002,5,0.058523,0.16539,0.086455
195,leverage,0.0003,5,0.058523,0.16539,0.086455
196,leverage,0.0004,5,0.058523,0.16539,0.086455
197,leverage,0.0005,5,0.058523,0.16539,0.086455
198,leverage,0.001,5,0.058143,0.163137,0.085731
179,confidence,0.01,5,0.057407,0.162235,0.084806
174,support,0.001,5,0.057407,0.162235,0.084806


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
13,confidence,1.0,1,0.092562,0.025237,0.03966
34,conviction,5.0,1,0.089256,0.024335,0.038244
12,confidence,0.75,1,0.089256,0.024335,0.038244
4,support,0.01,1,0.085995,0.015773,0.026657
33,conviction,2.0,1,0.08595,0.023434,0.036827
11,confidence,0.5,1,0.08595,0.023434,0.036827
32,conviction,1.5,1,0.085526,0.023434,0.036788
3,support,0.005,1,0.082811,0.029743,0.043767
56,confidence,1.0,2,0.082645,0.045065,0.058326
55,confidence,0.75,2,0.081818,0.044615,0.057743


In [9]:
# min_support ~ 0.01
freq_itemsets = pickle.load(open('decoded_out_135_10.pkl', 'rb'))

methods_dict = {
    'support': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1],
    'confidence':[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1],
    'lift':[0, 0.5, 5, 10, 20, 50, 100],
    'leverage':[0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001],
    'conviction':[0.5, 1, 1.1, 1.25, 1.4, 1.5, 2, 5],
    'zhangs_metric':[0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 1],
}

results3 = play_with(freq_itemsets, methods_dict, test, test_labels, top_n=[1, 2, 3, 4, 5])

100%|██████████| 1461/1461 [00:00<00:00, 4217.80it/s]|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4313.14it/s]|█▍        | 1/7 [00:00<00:02,  2.86it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4300.22it/s]|██▊       | 2/7 [00:00<00:01,  2.90it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4328.83it/s]|████▎     | 3/7 [00:01<00:01,  2.91it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4279.21it/s]|█████▋    | 4/7 [00:01<00:01,  2.92it/s]
100%|██████████| 1461/1461 [00:00<00:00, 273090.52it/s]█████▏  | 5/7 [00:01<00:00,  2.92it/s] 
100%|██████████| 1461/1461 [00:00<00:00, 272932.40it/s]█████▏  | 5/7 [00:01<00:00,  2.92it/s]
Method: support, Threshold: 0.1000, N_rules: 0: 100%|██████████| 7/7 [00:01<00:00,  4.04it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4290.76it/s] 0%|          | 0/7 [00:00<?, ?it/s]
100%|██████████| 1461/1461 [00:00<00:00, 4305.20it/s]14%|█▍        | 1/7 [00:00<00:02,  2.92it/s]
100%|██████████| 1461/1461 [00:00<00:00, 7907.08it/s]9%|██▊   

In [10]:
display(results3.sort_values(by='f1_score', ascending=False).head(10))
display(results3.sort_values(by='recall', ascending=False).head(10))
display(results3.sort_values(by='precision', ascending=False).head(10))

Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
173,support,0.0005,5,0.067157,0.032898,0.044162
187,lift,0.5,5,0.067157,0.032898,0.044162
199,conviction,0.5,5,0.067157,0.032898,0.044162
172,support,0.0001,5,0.067157,0.032898,0.044162
174,support,0.001,5,0.067157,0.032898,0.044162
175,support,0.005,5,0.067157,0.032898,0.044162
176,support,0.01,5,0.067157,0.032898,0.044162
179,confidence,0.01,5,0.067157,0.032898,0.044162
186,lift,0.0,5,0.067157,0.032898,0.044162
180,confidence,0.05,5,0.067157,0.032898,0.044162


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
173,support,0.0005,5,0.067157,0.032898,0.044162
187,lift,0.5,5,0.067157,0.032898,0.044162
172,support,0.0001,5,0.067157,0.032898,0.044162
174,support,0.001,5,0.067157,0.032898,0.044162
175,support,0.005,5,0.067157,0.032898,0.044162
176,support,0.01,5,0.067157,0.032898,0.044162
179,confidence,0.01,5,0.067157,0.032898,0.044162
186,lift,0.0,5,0.067157,0.032898,0.044162
180,confidence,0.05,5,0.067157,0.032898,0.044162
199,conviction,0.5,5,0.067157,0.032898,0.044162


Unnamed: 0,method,threshold,top_n,precision,recall,f1_score
9,confidence,0.1,1,0.103226,0.014421,0.025306
4,support,0.01,1,0.093366,0.017125,0.028941
8,confidence,0.05,1,0.093366,0.017125,0.028941
7,confidence,0.01,1,0.093366,0.017125,0.028941
0,support,0.0001,1,0.093366,0.017125,0.028941
3,support,0.005,1,0.093366,0.017125,0.028941
2,support,0.001,1,0.093366,0.017125,0.028941
1,support,0.0005,1,0.093366,0.017125,0.028941
95,confidence,0.1,3,0.08913,0.018477,0.030608
52,confidence,0.1,2,0.08913,0.018477,0.030608
