In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
data = pd.read_csv('data/2020-Feb.csv')

In [3]:
def create_dense_matrix(data, column):
    grouped_data = data.groupby('user_id')[column].apply(lambda x: ' '.join(map(str, x.unique())))
    vectorizer = CountVectorizer(binary=True, dtype=bool)
    sparse_matrix = vectorizer.fit_transform(grouped_data.astype(str))
    return pd.DataFrame(sparse_matrix.toarray(), columns=vectorizer.get_feature_names_out())

filtered_data = data[data['event_type'] == 'purchase'][['user_id', 'product_id']].dropna()
filtered_data['product_id'] = filtered_data['product_id'].astype(str)

product_dense_df = create_dense_matrix(filtered_data, 'product_id')
brand_basket_data = data[data['event_type'] == 'purchase'][['user_id', 'brand']].dropna()
brand_basket_data['brand'] = brand_basket_data['brand'].apply(lambda x: x.replace('.', '_')) 
category_basket_data = data[data['event_type'] == 'purchase'][['user_id', 'category_id']].dropna()

brand_sparse_df = create_dense_matrix(brand_basket_data, 'brand')

category_sparse_df = create_dense_matrix(category_basket_data, 'category_id')


In [4]:
def generate_association_rules(data_df, min_support, min_confidence):
    frequent_itemsets = apriori(data_df, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    return rules

In [5]:
product_association_rules = generate_association_rules(product_dense_df,  min_support=0.002, min_confidence=0.01)

In [6]:
brand_association_rules = generate_association_rules(brand_sparse_df,  min_support=0.002, min_confidence=0.01)
category_association_rules = generate_association_rules(category_sparse_df,  min_support=0.002, min_confidence=0.01)

In [7]:
product_association_rules['antecedents'] = product_association_rules['antecedents'].apply(lambda x: ', '.join(list(x)))
product_association_rules['consequents'] = product_association_rules['consequents'].apply(lambda x: ', '.join(list(x)))

product_association_rules.to_csv('reguly_asocjacyjne_produkty_Feb.csv', index=False, float_format='%.3f')

print("Zapisano reguły do pliku 'reguly_asocjacyjne_produkty_Feb.csv'.")

Zapisano reguły do pliku 'reguly_asocjacyjne_produkty_Feb.csv'.


In [8]:
brand_association_rules['antecedents'] = brand_association_rules['antecedents'].apply(lambda x: ', '.join(list(x)))
brand_association_rules['consequents'] = brand_association_rules['consequents'].apply(lambda x: ', '.join(list(x)))

brand_association_rules.to_csv('reguly_asocjacyjne_marki_Feb.csv', index=False, float_format='%.3f')

print("Zapisano reguły do pliku 'reguly_asocjacyjne_marki_Feb.csv'.")

Zapisano reguły do pliku 'reguly_asocjacyjne_marki_Feb.csv'.


In [9]:
category_association_rules['antecedents'] = category_association_rules['antecedents'].apply(lambda x: ', '.join(list(x)))
brand_association_rules['consequents'] = category_association_rules['consequents'].apply(lambda x: ', '.join(list(x)))

category_association_rules.to_csv('reguly_asocjacyjne_kategorie_Feb.csv', index=False, float_format='%.3f')

print("Zapisano reguły do pliku 'reguly_asocjacyjne_kategorie_Feb.csv'.")

Zapisano reguły do pliku 'reguly_asocjacyjne_kategorie_Feb.csv'.
