In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Apriori Algorithm

**Apriori is an algorithm for frequent item set mining and association rule learning over relational databases. It proceeds by identifying the frequent individual items in the database and extending them to larger and larger item sets as long as those item sets appear sufficiently often in the database.**

**Why Apriori**

Apriori helps us understand the buying patterns of customers, 
Example - Customers buying a lot of goods from a grocery store, by applying this method of the algorithm the grocery stores can enhance their sales performance and could work effectively.

**Antecedent and Consequent**

The IF component of an association rule is known as the antecedent. The THEN component is known as the consequent.

E.g., IF milk THEN bread, milk -> bread

**Support, Lift, Confidence, Antecendent and Consequent**

**support = occurance of item / total no of transaction.**

**confidance = support ( X Union Y) / support(X).**

**lift = support (X Union Y)/ support(X) * support(Y) .**

The LHS is the Antecedent. The RHS is the Consequent. Note: Based on the data, the rules are created. For rule 1:

For example if,

Support says that 67% of customers purchased milk and cheese. Confidence is that 100% of the customers that bought milk also bought cheese. Lift represents the 28% increase in expectation that someone will buy cheese, when we know that they bought milk. This is the conditional probability.



In [None]:
#pip install apyori

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
df = pd.read_csv('../input/online-retail-ii-uci/online_retail_II.csv')
df.shape
transactions = []

In [None]:
df.head()

In [None]:
df.shape

**Cheking for Null values, and stripping them off**

In [None]:
df.isnull().values.any()

In [None]:
#Stripping extra spaces
df['Description'] = df['Description'].str.strip()

#Dropping rows without any invoice number
df.dropna(axis = 0, subset = ['Invoice'], inplace = True)
df['Invoice'] = df['Invoice'].astype(str)

#Dropping Transactions done on credit
df = df[~df['Invoice'].str.contains('C')]


In [None]:
df.Country.unique()

**Creating a basket for transactions done in USA**

In [None]:
basket_france = (df[df['Country'] == 'France'].groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('Invoice'))

**Hot Encoding, for easy reading of data**

In [None]:
def hot_encode(x):
    if (x <= 0):
        return 0
    else:
        return 1

**1 denotes that item was bought on the given invoice number**

In [None]:
basket_encoded = basket_france.applymap(hot_encode)
basket_france = basket_encoded
basket_france.head()

**Building the Apriori Model**

In [None]:
#Applying Apriori Algo on the dataset
frequent_items = apriori(basket_france, min_support = 0.1, use_colnames = True)

#collecting the inferred data from apriori to 'rules'
rules = association_rules(frequent_items, metric = 'lift', min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])

In [None]:
rules

**From the above 'rules' table we can infer that for e.g.,**

***If the antecedents is '(SET/6 RED SPOTTY PAPER PLATES)' the there is a 91% chance that the consequents is (SET/6 RED SPOTTY PAPER CUPS) ***