# Imports

In [44]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpmax, fpgrowth, association_rules

# Load data

In [73]:
customers = pd.read_csv("./data/olist_customers_dataset.csv")
sellers = pd.read_csv("./data/olist_sellers_dataset.csv")
reviews = pd.read_csv("./data/olist_order_reviews_dataset.csv")
items = pd.read_csv("./data/olist_order_items_dataset.csv")
products = pd.read_csv("./data/olist_products_dataset.csv")
geolocation = pd.read_csv("./data/olist_geolocation_dataset.csv")
category_name_translation = pd.read_csv("./data/product_category_name_translation.csv")
orders = pd.read_csv("./data/olist_orders_dataset.csv")
order_payments = pd.read_csv("./data/olist_order_payments_dataset.csv")

In [74]:
datasets = [customers, sellers, reviews, items, products, geolocation, category_name_translation, orders, order_payments]

In [4]:
df = orders.merge(items, on="order_id").merge(products, on = "product_id").merge(sellers, on="seller_id").merge(customers, on="customer_id")

# Create list of products

In [78]:
df2 = products.merge(items, on="product_id").merge(orders, on="order_id")

In [79]:
df2.columns

Index(['product_id', 'product_category_name', 'product_name_lenght',
       'product_description_lenght', 'product_photos_qty', 'product_weight_g',
       'product_length_cm', 'product_height_cm', 'product_width_cm',
       'order_id', 'order_item_id', 'seller_id', 'shipping_limit_date',
       'price', 'freight_value', 'customer_id', 'order_status',
       'order_purchase_timestamp', 'order_approved_at',
       'order_delivered_carrier_date', 'order_delivered_customer_date',
       'order_estimated_delivery_date'],
      dtype='object')

In [80]:
x = []
for id_, items in df2.groupby("customer_id"):
    x.append(list(items['product_category_name'].astype(str)))

In [81]:
x[:10]

[['brinquedos'],
 ['beleza_saude'],
 ['bebes'],
 ['cool_stuff'],
 ['cama_mesa_banho'],
 ['esporte_lazer'],
 ['fashion_bolsas_e_acessorios'],
 ['brinquedos'],
 ['fashion_bolsas_e_acessorios'],
 ['pet_shop']]

# Create list of frequent items

In [82]:
te = TransactionEncoder()
te_ary = te.fit(x).transform(x)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = fpgrowth(df, min_support=0.00005, use_colnames=True)
frequent_itemsets.head(5)

Unnamed: 0,support,itemsets
0,0.039385,(brinquedos)
1,0.089555,(beleza_saude)
2,0.02924,(bebes)
3,0.036811,(cool_stuff)
4,0.095443,(cama_mesa_banho)


In [83]:

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.01)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(moveis_decoracao),(cama_mesa_banho),0.065362,0.095443,0.000709,0.010854,0.113726,-0.005529,0.914483
1,(casa_construcao),(moveis_decoracao),0.004966,0.065362,0.000132,0.026531,0.405903,-0.000193,0.96011
2,(casa_construcao),(ferramentas_jardim),0.004966,0.035656,7.1e-05,0.014286,0.400658,-0.000106,0.97832
3,(construcao_ferramentas_iluminacao),(moveis_decoracao),0.002473,0.065362,0.000111,0.045082,0.689728,-5e-05,0.978763
4,(audio),(relogios_presentes),0.003547,0.057,6.1e-05,0.017143,0.30075,-0.000141,0.959447
5,(moveis_sala),(moveis_decoracao),0.004277,0.065362,7.1e-05,0.016588,0.253782,-0.000209,0.950403
6,(casa_conforto),(cama_mesa_banho),0.004024,0.095443,0.000436,0.108312,1.134835,5.2e-05,1.014432
7,(casa_conforto),(moveis_decoracao),0.004024,0.065362,6.1e-05,0.015113,0.231226,-0.000202,0.94898
8,(artes),(moveis_decoracao),0.002047,0.065362,5.1e-05,0.024752,0.378699,-8.3e-05,0.95836
