## Carregando o drive 



In [2]:
from google.colab import drive

inicial_path = '/content/drive'
drive.mount(inicial_path)

stringPath = inicial_path + '/My Drive/Inteligência Artificial/UA 12/aula 23/Python/Online Retail.xlsx'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##Carregando as libs

In [4]:
import numpy as np 
import pandas as pd 

#http://rasbt.github.io/mlxtend/
#Mlxtend (extensões de aprendizado de máquina) é uma biblioteca Python de ferramentas úteis para as tarefas de ciência de dados do dia-a-dia.
from mlxtend.frequent_patterns import apriori, association_rules 


##Etapa 2: carregar e explorar os dados

In [None]:
# Loading the Data 
data = pd.read_excel(stringPath) 
data.head() 

### Explorando as colunas dos dados


In [6]:
data.columns 

# Explorando as diferentes regiões de transações
data.Country.unique() 

Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

##Etapa 3: limpar os dados

In [7]:
# Removendo espaços extras na descrição
data['Description'] = data['Description'].str.strip() 
  
# Eliminando as linhas sem qualquer número de fatura 
data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True) 
data['InvoiceNo'] = data['InvoiceNo'].astype('str') 
  
# Descartando todas as transações que foram feitas a crédito
data = data[~data['InvoiceNo'].str.contains('C')] 

##Etapa 4: dividir os dados de acordo com a região da transação

In [9]:
# Transações feitas na França
basket_France = (data[data['Country'] =="France"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo')) 
  
# Transações feitas no Reino Unido 
basket_UK = (data[data['Country'] =="United Kingdom"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo')) 
  
# Transações realizadas em Portugal 
basket_Por = (data[data['Country'] =="Portugal"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo')) 

## Transações realizadas na Suécia
basket_Sweden = (data[data['Country'] =="Sweden"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo')) 

#Step 5: Hot encoding the Data


In [10]:
# Defining the hot encoding function to make the data suitable  
# for the concerned libraries 
def hot_encode(x): 
    if(x<= 0): 
        return 0
    if(x>= 1): 
        return 1
  
# Codificando os conjuntos de dados 
basket_encoded = basket_France.applymap(hot_encode) 
basket_France = basket_encoded 
  
basket_encoded = basket_UK.applymap(hot_encode) 
basket_UK = basket_encoded 
  
basket_encoded = basket_Por.applymap(hot_encode) 
basket_Por = basket_encoded 
  
basket_encoded = basket_Sweden.applymap(hot_encode) 
basket_Sweden = basket_encoded 

##Etapa 6: construir os modelos e analisar os resultados

##France

In [11]:
# Construindo o modelo
frq_items = apriori(basket_France, min_support = 0.05, use_colnames = True) 
  
# Coletando as regras inferidas em um dataframe 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head())

                                           antecedents  ... conviction
44                        (JUMBO BAG WOODLAND ANIMALS)  ...        inf
258  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...  ...        inf
272  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...  ...        inf
300  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...  ...  34.897959
301  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...  ...  34.489796

[5 rows x 9 columns]


## United Kingdom:

In [12]:
frq_items = apriori(basket_UK, min_support = 0.01, use_colnames = True) 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head()) 

                                       antecedents  ... conviction
117           (BEADED CRYSTAL HEART PINK ON STICK)  ...  39.637371
2018  (SUKI  SHOULDER BAG, JAM MAKING SET PRINTED)  ...  26.096206
2295         (HERB MARKER MINT, HERB MARKER THYME)  ...  21.947227
2301   (HERB MARKER ROSEMARY, HERB MARKER PARSLEY)  ...  20.444951
2302      (HERB MARKER THYME, HERB MARKER PARSLEY)  ...  20.443842

[5 rows x 9 columns]


## Portugal

In [13]:
frq_items = apriori(basket_Por, min_support = 0.05, use_colnames = True) 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head()) 

                              antecedents  ... conviction
1170   (SET 12 COLOUR PENCILS DOLLY GIRL)  ...        inf
1171     (SET 12 COLOUR PENCILS SPACEBOY)  ...        inf
1172   (SET 12 COLOUR PENCILS DOLLY GIRL)  ...        inf
1173   (SET OF 4 KNICK KNACK TINS LONDON)  ...        inf
1174  (SET OF 4 KNICK KNACK TINS POPPIES)  ...        inf

[5 rows x 9 columns]


## Sweden

In [14]:
frq_items = apriori(basket_Sweden, min_support = 0.05, use_colnames = True) 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head()) 

                           antecedents  ... conviction
0        (PACK OF 72 SKULL CAKE CASES)  ...        inf
1        (12 PENCILS SMALL TUBE SKULL)  ...        inf
4       (ASSORTED BOTTLE TOP  MAGNETS)  ...        inf
5              (36 DOILIES DOLLY GIRL)  ...        inf
180  (CHILDRENS CUTLERY CIRCUS PARADE)  ...        inf

[5 rows x 9 columns]
