In [95]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Sample data
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

# Transform data into suitable format
print(dataset)
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
# Apply Apriori algorithm
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print(rules)

[['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Milk', 'Apple', 'Kidney Beans', 'Eggs'], ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]
              antecedents            consequents  antecedent support  \
0                  (Eggs)         (Kidney Beans)                 0.8   
1          (Kidney Beans)                 (Eggs)                 1.0   
2                  (Eggs)                (Onion)                 0.8   
3                 (Onion)                 (Eggs)                 0.6   
4                  (Milk)         (Kidney Beans)                 0.6   
5                 (Onion)         (Kidney Beans)                 0.6   
6                (Yogurt)         (Kidney Beans)                 0.6   
7    (Eggs, Kidney Beans)                (Onion)                 0.8   
8           (Eggs, Onion)         (Kidney Beans)              

In [97]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# 1. support(A⇒ B) =P(A ∪ B)

# Support (Books) = Freq (Books)/Total transactions made

# Support (Books) = 6/100 = 0.06%

# 2. Confidence: It is the ratio of combined transactions to individual transactions.

# confidence(A⇒ B) =P(B|A)

# Confidence (Books) = Combined transactions/Individual transaction

# Confidence (Books) = 0.06/0.08 = 0.75

# 3. Lift: It is the ratio of the confidence percent to the support percent.

# Lift = 0.75/0.10 = 7.5

# If the value of lift < 1, the combination is not bought by consumers frequently.
# If the value of lift >1, the combination is brought frequently by the consumers.
# If the value of lift = 1, then the purchase of antecedent makes no difference on the consequent.

# Sample data
df1 = pd.read_csv('data.csv')
print(df)

# Get data from Invoice ID with Barcode
dataset = df1[['InvoiceID','Barcode']]#
#print(dataset)
dataset = dataset[dataset.groupby('InvoiceID')['Barcode'].transform('size') > 1]
print("Dataset after remove transaction only one item: \n", dataset)
dataset_str = dataset
dataset_str['Barcode'] = dataset_str['Barcode'].astype(str)
dataset_str = dataset_str.groupby('InvoiceID')['Barcode'].agg(lambda x: ', '.join(x)).reset_index()
print("Dataset group Barcode to one transaction: \n", dataset_str)

dataset_str.to_csv("output.csv")

Dataset after remove transaction only one item: 
                  InvoiceID        Barcode
1       VN0001010102230002  2010902000198
2       VN0001010102230002  8935049510864
3       VN0001010102230002  2602010136625
4       VN0001010102230003  2010809000253
5       VN0001010102230003  2701020000046
...                    ...            ...
151930  VN9996020102230158  8858223013039
151931  VN9996020102230158  4897036691342
151932  VN9996020102230159  8934588063053
151933  VN9996020102230159  2270102000033
151934  VN9996020102230159  2703010000043

[119428 rows x 2 columns]
Dataset group Barcode to one transaction: 
                 InvoiceID                                      Barcode
0      VN0001010102230002  2010902000198, 8935049510864, 2602010136625
1      VN0001010102230003                 2010809000253, 2701020000046
2      VN0001010102230005                 8935001282266, 8936034875357
3      VN0001010102230006                  769828221591, 2010805000363
4      VN00010101022