In [9]:
import numpy as np
from itertools import combinations
from functools import reduce

In [16]:
# Every row is a transaction, and every column represent the item bought
# Note that in a single transaction, there can be similar items bought
items = np.array([['Mango', 'Onion', 'Nintendo', 'Key-chain', 'Eggs', 'Yo-yo'],
                  ['Doll', 'Onion', 'Nintendo', 'Key-chain', 'Eggs', 'Yo-yo'],
                  ['Mango', 'Apple', 'Key-chain', 'Eggs'],
                  ['Mango', 'Umbrella', 'Corn', 'Key-chain', 'Yo-yo'],
                  ['Corn', 'Onion', 'Onion', 'Key-chain', 'Ice-cream', 'Eggs']])

# items = np.array([['apple', 'beer', 'rice', 'meat'],
#                   ['apple', 'beer', 'rice'],
#                   ['apple', 'beer'], 
#                   ['apple', 'pear'],
#                   ['milk', 'beer', 'rice', 'meat'],
#                   ['milk', 'beer', 'rice'],
#                   ['milk', 'beer'],
#                   ['milk', 'pear']])
items

array([list(['Mango', 'Onion', 'Nintendo', 'Key-chain', 'Eggs', 'Yo-yo']),
       list(['Doll', 'Onion', 'Nintendo', 'Key-chain', 'Eggs', 'Yo-yo']),
       list(['Mango', 'Apple', 'Key-chain', 'Eggs']),
       list(['Mango', 'Umbrella', 'Corn', 'Key-chain', 'Yo-yo']),
       list(['Corn', 'Onion', 'Onion', 'Key-chain', 'Ice-cream', 'Eggs'])], dtype=object)

In [11]:
def support(items, transactions):
    '''Return the score of the frequency in which an item appears in
    the transactions.
    Example: If there are 8 transactions, an apple appears 4 times,
    the support score will be 4 / 8 = 0.5
    
    Parameters:
    ----------
    items: ndarray, [item1, item2]
        A list of item in the basket
    transactions: ndarray, [[item1, item2], [item1, item2, item2], ...]
        A 2-dimensional array that contains an array of items.
    
    Returns:
    --------
    score: int
        The support score
    '''
    itemset = []
    for transaction in transactions:
        if all([True if item in transaction else False 
                for item in items]):
            itemset.append(1)
    return len(itemset) / len(transactions)

support(['apple'], items)

0.0

In [12]:
def confidence(X, y, transactions):
    '''Tells how likely item y is purchased if item X is purchased.
    This is measured by the proportion of transaction with item X, 
    in which item y also appears.'''
    num = support([X, y], transactions)
    den = support([X], transactions)
    if den == 0:
         return 0
    return num / den

# confidence('apple', 'beer', items)    

In [13]:
def lift(X, y, transactions):
    '''This says how likely item y is purchased when item X is purchased, while controlling how 
    popular item Y is. A value greater than 1 implies that item y is likely to be bought if item X
    is bought, while a value less than 1 means item y is unlikely to be bought if item X is bought'''
    num = support([X, y], transactions)
    den_X = support([X], transactions)
    den_y = support([y], transactions)
    den = den_X * den_y
    if den == 0:
        return 0
    return num / den

# lift('apple', 'beer', items)

In [14]:
def first(tuples):
    return [k for (k, v) in tuples]

def second(tuples):
    return [v for (k, v) in tuples]

def array_2d(array_1d):
    return np.reshape(array_1d, (-1, 1)).tolist()

def flatten(array_2d):
    return reduce(lambda x, y: x + y, items)

def unique(array):
    return list(set(array))

def support_scores(items, transactions):
    return [(item, support(item, transactions)) for item in items]

In [15]:
def apriori(items, min_score = 0.25, min_items = 3, epochs = 10):
    out = {}
    rec = {}

    # Initialize the default values
    single_items = array_2d(unique(flatten(items)))
    single_items_scores = support_scores(single_items, items)
    single_items_scores = [(i, score) for (i, score) in single_items_scores 
                           if score > min_score]
    out[1] = single_items_scores
    
    for i in range(2, epochs):
        if out.get(i - 1) is None:
            break
        unique_items = first(out[i - 1])
        
        if isinstance(unique_items, list):
            if isinstance(unique_items[0], list):
                unique_items = unique(flatten(unique_items))
        
        # Create a combination of itemsets
        itemsets = list(combinations(unique_items, r = i))
        scores = support_scores(itemsets, items)
        scores_min = [(list(item), score) for (item, score) in scores 
                      if score >= min_score]
        
        if len(scores_min) == 0:
            break
        out[i] = scores_min
        
        # Only calculate recommendations if user has more than 1 items
        if i > min_items:
            tmp = []
            previous_scores = out[i - 1]
            for j, _ in scores_min:
                # Contains a larger subset
                j_fs = frozenset(j)
                
                for k, _ in previous_scores:
                    # Handle array
                    k_fs = frozenset(k)
                    
                    # Find the difference in items, that would be the recommendations
                    recommendations = list(j_fs.difference(k_fs))
                    
                    # What if there are more than 1 recommendations?
                    for recommendation in recommendations:
                        # For each of the recommendations, create a new itemset
                        new_recs = k_fs.union(frozenset([recommendation]))
                        tmp.append((k, new_recs, recommendation))

            for bought_items, current_basket, recommendation in tmp:
                for target_basket, score in scores_min:
                    if current_basket == frozenset(target_basket):
                        rec[frozenset(bought_items)] = (recommendation, score)
    return rec

    
scores = apriori(items, 0.25)
for i in scores:
    purchased_items = ', '.join(list(i))
    recommended_items, score = scores[i]
    print('If you buy {}, you might like {}.'.format(purchased_items, recommended_items))
    print('- Score: {}'.format(score))
    print()
# Prediction
# item, score = scores[frozenset(['Onion', 'Yo-yo', 'Key-chain'])]
# print('If you purchase Onion and Yo-yo, you might like {}. Score = {}'.format(item, score))

If you buy Yo-yo, Nintendo, Key-chain, you might like Onion.
- Score: 0.4

If you buy Yo-yo, Nintendo, Eggs, you might like Onion.
- Score: 0.4

If you buy Yo-yo, Nintendo, Onion, you might like Eggs.
- Score: 0.4

If you buy Yo-yo, Eggs, Key-chain, you might like Onion.
- Score: 0.4

If you buy Yo-yo, Onion, Key-chain, you might like Eggs.
- Score: 0.4

If you buy Yo-yo, Eggs, Onion, you might like Nintendo.
- Score: 0.4

If you buy Key-chain, Nintendo, Eggs, you might like Onion.
- Score: 0.4

If you buy Key-chain, Nintendo, Onion, you might like Eggs.
- Score: 0.4

If you buy Nintendo, Onion, Eggs, you might like Key-chain.
- Score: 0.4

If you buy Key-chain, Eggs, Onion, you might like Nintendo.
- Score: 0.4

If you buy Yo-yo, Nintendo, Eggs, Key-chain, you might like Onion.
- Score: 0.4

If you buy Yo-yo, Nintendo, Onion, Key-chain, you might like Eggs.
- Score: 0.4

If you buy Yo-yo, Nintendo, Onion, Eggs, you might like Key-chain.
- Score: 0.4

If you buy Yo-yo, Eggs, Onion, Key