In [37]:
import csv
from itertools import combinations

Prétraitement des données

In [38]:
def load_dataset_from_csv(file_path):
    dataset = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            dataset.append(row)
    return dataset

In [39]:
# Load dataset from CSV file
file_path = 'replace-it-with-file_path'
dataset = load_dataset_from_csv(file_path)

In [40]:

print("First 5 transactions:")
for transaction in dataset[:5]:
    print(transaction)

First 5 transactions:
['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil']
['burgers', 'meatballs', 'eggs']
['chutney']
['turkey', 'avocado']
['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea']


Implémentation de l'algorithme Apriori

In [41]:
def create_candidate_itemset(dataset):
    candidate_itemset = set()
    for transaction in dataset:
        for item in transaction:
            candidate_itemset.add(frozenset([item]))
    return candidate_itemset

In [42]:
def filter_candidates(dataset, candidate_itemset, min_support):
    support_counts = {}
    for transaction in dataset:
        for candidate in candidate_itemset:
            if candidate.issubset(transaction):
                support_counts[candidate] = support_counts.get(candidate, 0) + 1

    num_transactions = len(dataset)
    frequent_itemset = set()
    support_data = {}
    for itemset, support in support_counts.items():
        support_value = support / num_transactions
        if support_value >= min_support:
            frequent_itemset.add(itemset)
            support_data[itemset] = support_value
    return frequent_itemset, support_data

In [43]:
def generate_candidates(prev_frequent_itemset, k):
    candidates = set()
    for itemset1 in prev_frequent_itemset:
        for itemset2 in prev_frequent_itemset:
            if len(itemset1.union(itemset2)) == k:
                candidate = itemset1.union(itemset2)
                if all([frozenset(x) in prev_frequent_itemset for x in combinations(candidate, k-1)]):
                    candidates.add(candidate)
    return candidates

Extraction des règles d'association

In [None]:
min_support = 0.0045
min_confidence = 0.2
min_lift = 3
min_length = 2 

In [45]:
def generate_rules(frequent_itemsets, support_data, min_confidence, min_lift, min_length):
    rules = set()  # Utilisation d'un ensemble pour éviter les doublons
    for itemset in frequent_itemsets:
        if len(itemset) >= min_length:
            for item in itemset:
                antecedent = itemset - frozenset([item])
                consequent = frozenset([item])
                confidence = support_data[itemset] / support_data[antecedent]
                lift = confidence / support_data[consequent]
                if confidence >= min_confidence and lift >= min_lift:
                    # Utilisation d'un tuple pour stocker la règle
                    rule = (antecedent, consequent, support_data[itemset], confidence, lift)
                    rules.add(rule)  # Ajout de la règle à l'ensemble
    return list(rules)  # Conversion de l'ensemble en liste avant de la retourner


In [46]:
print(len(rules))

28


In [47]:
print(rules[0])

(frozenset({'spaghetti', 'frozen vegetables'}), frozenset({'olive oil'}), 0.005732568990801226, 0.20574162679425836, 3.1240241752707125)


application de l'algorithme


In [None]:
def apriori(dataset, min_support, min_confidence, min_lift, min_length):
    frequent_itemsets = []
    support_data = {}
    candidate_itemset = create_candidate_itemset(dataset)
    k = 1
    while True:
        frequent_itemset, support_values = filter_candidates(dataset, candidate_itemset, min_support)
        if len(frequent_itemset) == 0:
            break
        frequent_itemsets.append(frequent_itemset)
        support_data.update(support_values)
        candidate_itemset = generate_candidates(frequent_itemset, k + 1)
        k += 1
    rules = generate_rules(set.union(*frequent_itemsets), support_data, min_confidence, min_lift, min_length)
    return rules

In [49]:

rules = apriori(dataset, min_support, min_confidence, min_lift, min_length)

Visualisation des résultats

In [50]:
# Print the generated rules
for rule in rules:
    antecedent = ', '.join(rule[0])
    consequent = ', '.join(rule[1])
    print("Rule: " + antecedent + " -> " + consequent)
    print("Support: " + str(rule[2]))
    print("Confidence: " + str(rule[3]))
    print("Lift: " + str(rule[4]))
    print("=====================================")

Rule: spaghetti, frozen vegetables -> olive oil
Support: 0.005732568990801226
Confidence: 0.20574162679425836
Lift: 3.1240241752707125
Rule: pasta -> shrimp
Support: 0.005065991201173177
Confidence: 0.3220338983050847
Lift: 4.506672147735896
Rule: cooking oil, spaghetti -> ground beef
Support: 0.004799360085321957
Confidence: 0.3025210084033613
Lift: 3.0789824749438446
Rule: cooking oil, ground beef -> spaghetti
Support: 0.004799360085321957
Confidence: 0.5714285714285714
Lift: 3.2819951870487856
Rule: mushroom cream sauce -> escalope
Support: 0.005732568990801226
Confidence: 0.3006993006993007
Lift: 3.790832696715049
Rule: mineral water, spaghetti, milk -> frozen vegetables
Support: 0.004532728969470737
Confidence: 0.28813559322033894
Lift: 3.0228043143297376
Rule: spaghetti, frozen vegetables -> shrimp
Support: 0.005999200106652446
Confidence: 0.21531100478468898
Lift: 3.0131489680782684
Rule: mineral water, soup -> olive oil
Support: 0.005199306759098787
Confidence: 0.22543352601156