# Association Analysis

### Import the libraries

In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt

### Read the data into a DataFrame from the CSV file

In [None]:
df = pd.read_csv('retail_dataset.csv', sep=',')

### Print top 5 rows 

In [None]:
df.head(5)

### Find and print the unique products in the dataset

In [None]:
items = (df['0'].unique())
items

### One Hot Encode the data and print the top 5 rows

In [None]:
itemset = set(items)
encoded_vals = []
for index, row in df.iterrows():
    rowset = set(row) 
    labels = {}
    uncommons = list(itemset - rowset)
    commons = list(itemset.intersection(rowset))
    for uc in uncommons:
        labels[uc] = 0
    for com in commons:
        labels[com] = 1
    encoded_vals.append(labels)
encoded_vals[0]

ohe_df = pd.DataFrame(encoded_vals)
ohe_df.head(5)

### Apply Apriori

The apriori module from mlxtend library provides fast and efficient apriori implementation.

In [None]:
freq_items = apriori(ohe_df, min_support=0.2, use_colnames=True, verbose=1)
freq_items.head(12)

### Find the Association Rules

If-then associations called association rules which consists of an antecedent (if) and a consequent (then).

The result of association analysis shows which item is frequently purchased with other items.

In [None]:
rules = association_rules(freq_items, metric="confidence", min_threshold=0.6)
rules.head(12)

## Plot the results

In [None]:
plt.scatter(rules['support'], rules['confidence'], alpha=0.5)
plt.xlabel('support')
plt.ylabel('confidence')
plt.title('Support vs Confidence')
plt.show()

In [None]:
plt.scatter(rules['support'], rules['lift'], alpha=0.5)
plt.xlabel('support')
plt.ylabel('lift')
plt.title('Support vs Lift')
plt.show()

In [None]:
plt.scatter(rules['lift'], rules['confidence'], alpha=0.5)
plt.xlabel('lift')
plt.ylabel('confidence')
plt.title('Lift vs Confidence')
plt.show()

In [None]:
fit = np.polyfit(rules['lift'], rules['confidence'], 1)
fit_fn = np.poly1d(fit)
plt.plot(rules['lift'], rules['confidence'], 'yo', rules['lift'], fit_fn(rules['lift']))
plt.xlabel('lift')
plt.ylabel('confidence')
plt.title('Lift vs Confidence')
plt.show()