# Apriori

## Importing the libraries

In [66]:
!pip install apyori

Defaulting to user installation because normal site-packages is not writeable


In [67]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [68]:
dataset = pd.read_csv('data.csv', header = None)
transactions = []


for i in range(len(dataset)):
  transactions.append([str(dataset.values[i,j]) for j in range(9)])


## Training the Apriori model on the dataset

In [69]:
from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

## Visualising the results

### Displaying the first results coming directly from the output of the apriori function

In [70]:
results = list(rules)

In [71]:
results

[RelationRecord(items=frozenset({'1.0', '2.0'}), support=0.0392156862745098, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1.0'}), items_add=frozenset({'2.0'}), confidence=0.3333333333333333, lift=4.25), OrderedStatistic(items_base=frozenset({'2.0'}), items_add=frozenset({'1.0'}), confidence=0.5, lift=4.25)]),
 RelationRecord(items=frozenset({'3.0', '1.0'}), support=0.09803921568627451, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1.0'}), items_add=frozenset({'3.0'}), confidence=0.8333333333333334, lift=6.071428571428571), OrderedStatistic(items_base=frozenset({'3.0'}), items_add=frozenset({'1.0'}), confidence=0.7142857142857142, lift=6.07142857142857)]),
 RelationRecord(items=frozenset({'1.0', '6.0'}), support=0.058823529411764705, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1.0'}), items_add=frozenset({'6.0'}), confidence=0.5, lift=3.6428571428571423), OrderedStatistic(items_base=frozenset({'6.0'}), items_add=frozenset({'1.0'}), confidence

### Putting the results well organised into a Pandas DataFrame

In [72]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

### Displaying the results non sorted

In [73]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,1.0,2.0,0.039216,0.333333,4.25
1,1.0,3.0,0.098039,0.833333,6.071429
2,1.0,6.0,0.058824,0.5,3.642857
3,2.0,3.0,0.039216,0.5,3.642857
4,2.0,4.0,0.019608,0.25,4.25
5,2.0,6.0,0.039216,0.5,3.642857
6,3.0,4.0,0.039216,0.285714,4.857143
7,3.0,6.0,0.078431,0.571429,4.163265
8,4.0,6.0,0.058824,1.0,7.285714


### Displaying the results sorted by descending lifts

In [74]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
8,4.0,6.0,0.058824,1.0,7.285714
1,1.0,3.0,0.098039,0.833333,6.071429
6,3.0,4.0,0.039216,0.285714,4.857143
0,1.0,2.0,0.039216,0.333333,4.25
4,2.0,4.0,0.019608,0.25,4.25
7,3.0,6.0,0.078431,0.571429,4.163265
2,1.0,6.0,0.058824,0.5,3.642857
3,2.0,3.0,0.039216,0.5,3.642857
5,2.0,6.0,0.039216,0.5,3.642857
