In [165]:
import pandas as pd
import numpy  as np
import re                              # Used for splitting the Items with multiple delimiters
from   collections import Counter      # To count items in dictionaries
from   itertools   import combinations # To generate combinations of items
import functools

# The dataset was created with the notebook "DatasetCreation.ipynb".

routes = pd.read_csv('route_trips.csv', 
                     delimiter = ',', 
                     encoding  = 'utf-8')

routes.drop(routes.columns[0], 
            axis    = 1, 
            inplace = True)

In [98]:
# Prepare the delimiters for splitting the items
delimiters   = ",", "{", "}", "'"
regexPattern = '|'.join(map(re.escape, delimiters))

In [99]:
# Produce a new column for the Items
newItemsColumn = []
for index in range(0, len(routes)):
    tmp = list(filter(None, re.split(regexPattern, routes.Items[index])))
    newItemsColumn.append([item for item in tmp 
                                   if item.strip()])
routes.Items = newItemsColumn

In [100]:
# Set the threshold for the support of Frequent itemset theory
support = 8
items   = []
items   = [item for items in routes.Items 
                   for item in items]

# Count the occurrencies of each item in the whole dataset
# ("items" now contains a list of all the products for each trip)
itemCount = Counter(items)

# Select only those items that appear more than the support
frequentItems = [(item,) for item in itemCount 
                         if itemCount[item] >= support]

#len(frequentItems)

In [77]:
# Now we remove from the Items those that are not frequentItems
routes.Items = [list(filter(lambda i: i in [f for ft in frequentItems for f in ft], items)) for items in routes.Items]

In [78]:
# Here we generate combinations of 2 frequent items 
combination   = [list(combinations(items, 2)) for items in routes.Items]
comb          = [comb for combs in combination 
                         for comb in combs]

In [79]:
# Count how many pairs there are for each combination
pairCount     = Counter(comb)
# and select only those that appear more than the support
frequentPairs = [comb for comb in pairCount 
                         if pairCount[comb] >= support]

In [80]:
# then sort them for next operations
frequentPairs = [sorted(fp) for fp in frequentPairs]
frequentPairs = list(set([tuple(fp) for fp in frequentPairs]))
#frequentPairs

In [81]:
itemInPairs = set(f for fp in frequentPairs for f in fp)

In [104]:
# Now we remove from the Items those that are not frequentItems
itemsithStep = [list(filter(lambda i: i in itemInPairs, items)) for items in routes.Items]

In [12]:
#def Remove(duplicate): 
#    final_list = [] 
#    for num in duplicate: 
#        if num not in final_list: 
#            final_list.append(num) 
#    return final_list 

In [13]:
# Max depth for frequent tuples
#depth = 6
#frequentTuples = []
#for i in range(2,depth):
#    subset = []
#    for sl in range(0,len(frequentPairs)):
#        tryitems = frequentPairs[sl][:-1]
#        possibleitems = []
#        for asl in frequentPairs[:sl]+frequentPairs[sl+1:]:
#            if (len(set(tryitems).intersection(set(asl))) == len(tryitems)):
#                possibleitems.append(list(set(asl).difference(set(tryitems))))
#        if possibleitems != []:
#            for ssl in list(combinations(frequentPairs[sl], len(frequentPairs[sl])-1)):
#                if ssl != tryitems:
#                    for el in possibleitems:
#                        elem = el
#                        elem.extend(ssl)
#                        c = tuple(set(elem))
#                        if(set(c) in [set(fp) for fp in frequentPairs]):
#                            c = list(c)
#                            c.extend(list(item for item in tryitems))
#                            subset.append(tuple(set(c)))
#    subset = [tuple(s) for s in Remove(list(set(sb) for sb in subset))]
#    combination = [list(combinations(items, i+1)) for items in routes.Items]
#    comb = [tuple(comb) for combs in combination 
#                for comb in combs]
#    tuplesCount = Counter(comb)
    # and select only those that appear more than the support
#    frequentT = [tuples for tuples in tuplesCount 
#                        if tuplesCount[tuples] >= support]
#    frequentTuples.append(frequentT)
#    frequentPairs = frequentT

In [105]:
# Max depth for frequent tuples
depth = 6
frequentTuples = []
for i in range(2,depth):
    subset = []
    combination = [list(combinations(items, i+1)) for items in itemsithStep]
    comb = [tuple(comb) for combs in combination 
                for comb in combs]
    tuplesCount = Counter(comb)
    frequentT = [tuples for tuples in tuplesCount 
                            if tuplesCount[tuples] >= support]
    frequentTuples.append(frequentT)
    itemInTuples = set(f for fp in frequentT for f in fp)
    itemsithStep = [list(filter(lambda i: i in itemInTuples, items)) for items in itemsithStep]

In [109]:
frequentTuples

[[('Frozen-spring-rolls', 'Groceries', 'Snack-bar'),
  ('Malt-vinegar', 'Coleslaw-dressing', 'Helles-schankbier'),
  ('Malt-vinegar', 'Coleslaw-dressing', 'Chocolat-au-lait'),
  ('Malt-vinegar', 'Coleslaw-dressing', 'Soeppoeder'),
  ('Malt-vinegar', 'Chocolat-au-lait', 'Soeppoeder'),
  ('Coleslaw-dressing', 'Helles-schankbier', 'Soeppoeder'),
  ('Coleslaw-dressing', 'Chocolat-au-lait', 'Soeppoeder'),
  ('Mixed-nuts', 'Lime-marmalades', 'Cooking-oil'),
  ('Mixed-nuts', 'Cooking-oil', 'Kartoffelchips'),
  ('Jus-d-orange', 'Crossiants', 'Indian-food'),
  ('Jus-d-orange', 'Indian-food', 'Gelati'),
  ('Jus-d-orange', 'Indian-food', 'Dried-parmesan-cheese'),
  ('Indian-food', 'Gelati', 'Dried-parmesan-cheese'),
  ('Fruit-drinks', 'Frozen-snacks', 'Kauwgom'),
  ('Fruit-drinks', 'Frozen-snacks', 'Coffee-cake'),
  ('Fruit-drinks', 'Kauwgom', 'Coffee-cake'),
  ('Frozen-snacks', 'Kauwgom', 'Coffee-cake'),
  ('Produits-ssans-gluten', 'Kauwgom', 'Coffee-cake'),
  ('Pates-a-tartiner-aux-noisettes-et

In [110]:
# popularTuples will be our container for interesting objects. 
# Now we can find inside popular items and popular pairs but it is going to be extended.
popularTuples = frequentItems
popularTuples.extend(frequentPairs)
for ft in frequentTuples:
    popularTuples.extend(ft)

#popularTuples

In [155]:
def matches(rid, order, popularTuples):
    itemList   = []
    orderItems = routes.Items[(routes.RouteId == rid) & 
                              (routes.Order   == order)
                             ].tolist()[0]
    for obj in popularTuples:
        # If the object is a pair (so it is saved as list)
        check = True
        for item in obj:
            if item not in orderItems:
                check = False
        if check:
            itemList.append(list(obj))
    return itemList
    

def nextTuple(rid, order, popularTuples):
    # If the next does exist
    if(not routes[(routes.RouteId == rid) & 
                  (routes.Order   == order)
                 ].empty):
        return matches(rid, order, popularTuples)
    else:
    # Otherwise return an empty list
        return []

def divideOrder(keys, popularTuples):
    route = -1
    order = -1
    itemsOrdered = []
    itemsBrown = []
    for key in keys:
        if(key[0] == route):
            t = nextTuple(key[0], key[1], popularTuples)
            for i in range(0, key[1]-order+1):
                itemsBrown.append(tuple([i,t]))
        else:
            if route != -1:
                itemsOrdered.extend(itemsBrown)
                itemsBrown = []
            route = key[0]
            order = key[1]
            t = nextTuple(key[0], key[1], popularTuples)
            itemsBrown.append(tuple([0,t]))
    itemsOrdered.extend(itemsBrown)
    return itemsOrdered
            
    
def searchNext(tuples, popularTuples):
    keys      = [(routes.RouteId[row], routes.Order[row]) 
                 for row in range(0, len(routes)) 
                     if set(tuples).intersection(set(routes.Items[row])) == set(tuples)]
    newTuples = divideOrder(keys, popularTuples)
    return newTuples

In [156]:
popularPatterns = [(pop, searchNext(pop, popularTuples)) for pop in popularTuples]
#popularPatterns

In [198]:
# Limit of classification for frequent Patterns
limit = 10
patterns = []
for pattern in popularPatterns:
    patternLevel = [[(x[0], y), 1] for x in pattern[1] for y in x[1]]
    for lvl1 in range(0,len(patternLevel)):
        for lvl2 in range(0,len(patternLevel)):
            if (patternLevel[lvl1][0] == patternLevel[lvl2][0]) & (lvl1 != lvl2):
                patternLevel[lvl1][1] += patternLevel[lvl2][1]
                patternLevel[lvl2][1] = 0
    finalPattern = []
    for i in range(0,len(patternLevel)):
        if patternLevel[i][1] > limit:
            finalPattern.append(patternLevel[i])
    patterns.append((pattern[0],finalPattern))

In [199]:
patterns

[(('Scottish-shortbread',), []),
 (('Olives-denoyautees',), []),
 (('Miels',), [[(0, ['Miels']), 16], [(1, ['Miels']), 11]]),
 (('Pasteurized-milk',), [[(0, ['Pasteurized-milk']), 14]]),
 (('Indian-food-mixes',), []),
 (('Fat-free-milk',), [[(0, ['Fat-free-milk']), 11]]),
 (('Brussels-pate',),
  [[(0, ['White-plain-flour']), 13],
   [(0, ['Brussels-pate', 'White-plain-flour']), 13],
   [(1, ['Brussels-pate']), 17],
   [(2, ['Brussels-pate']), 12],
   [(0, ['Brussels-pate']), 24],
   [(0, ['Chicken-thighs']), 12],
   [(0, ['Brussels-pate', 'Chicken-thighs']), 12]]),
 (('Balsamic-vinaigrette',), []),
 (('Organic-tomatoes',), [[(0, ['Organic-tomatoes']), 13]]),
 (('Pesce-curado',), [[(0, ['Pesce-curado']), 15]]),
 (('White-coconut-chocolate',),
  [[(0, ['Syrups']), 11],
   [(0, ['Strawberry-licorice']), 12],
   [(0, ['Strawberry-licorice', 'White-coconut-chocolate']), 12],
   [(0, ['Syrups', 'White-coconut-chocolate']), 11],
   [(1, ['White-coconut-chocolate']), 12],
   [(0, ['White-cocon

In [164]:
[((x[0], y), 1) for x in popularPatterns[0][1] for y in x[1]]

[((0, ['Scottish-shortbread']), 1),
 ((0, ['Olives-denoyautees']), 1),
 ((0, ['Miels']), 1),
 ((0, ['Pasteurized-milk']), 1),
 ((0, ['Indian-food-mixes']), 1),
 ((0, ['Fat-free-milk']), 1),
 ((0, ['Brussels-pate']), 1),
 ((0, ['Balsamic-vinaigrette']), 1),
 ((0, ['Organic-tomatoes']), 1),
 ((0, ['Pesce-curado']), 1),
 ((0, ['Scottish-shortbread']), 1),
 ((0, ['Olives-denoyautees']), 1),
 ((0, ['Miels']), 1),
 ((0, ['Pasteurized-milk']), 1),
 ((0, ['Fat-free-milk']), 1),
 ((0, ['Brussels-pate']), 1),
 ((0, ['Balsamic-vinaigrette']), 1),
 ((0, ['Organic-tomatoes']), 1),
 ((0, ['White-coconut-chocolate']), 1),
 ((0, ['Aliments-d-origine-vegetale']), 1),
 ((0, ['Brussels-pate', 'White-coconut-chocolate']), 1),
 ((1, ['Scottish-shortbread']), 1),
 ((1, ['Olives-denoyautees']), 1),
 ((1, ['Miels']), 1),
 ((1, ['Pasteurized-milk']), 1),
 ((1, ['Fat-free-milk']), 1),
 ((1, ['Brussels-pate']), 1),
 ((1, ['Balsamic-vinaigrette']), 1),
 ((1, ['Organic-tomatoes']), 1),
 ((1, ['White-coconut-chocol

In [157]:
popularPatterns[0]

(('Scottish-shortbread',),
 [(0,
   [['Scottish-shortbread'],
    ['Olives-denoyautees'],
    ['Miels'],
    ['Pasteurized-milk'],
    ['Indian-food-mixes'],
    ['Fat-free-milk'],
    ['Brussels-pate'],
    ['Balsamic-vinaigrette'],
    ['Organic-tomatoes'],
    ['Pesce-curado']]),
  (0,
   [['Scottish-shortbread'],
    ['Olives-denoyautees'],
    ['Miels'],
    ['Pasteurized-milk'],
    ['Fat-free-milk'],
    ['Brussels-pate'],
    ['Balsamic-vinaigrette'],
    ['Organic-tomatoes'],
    ['White-coconut-chocolate'],
    ['Aliments-d-origine-vegetale'],
    ['Brussels-pate', 'White-coconut-chocolate']]),
  (1,
   [['Scottish-shortbread'],
    ['Olives-denoyautees'],
    ['Miels'],
    ['Pasteurized-milk'],
    ['Fat-free-milk'],
    ['Brussels-pate'],
    ['Balsamic-vinaigrette'],
    ['Organic-tomatoes'],
    ['White-coconut-chocolate'],
    ['Aliments-d-origine-vegetale'],
    ['Brussels-pate', 'White-coconut-chocolate']]),
  (0,
   [['Scottish-shortbread'],
    ['Olives-denoyautees'